Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_sad8x8_neon|
     13     EXPORT  |vp8_sad8x16_neon|
     14     EXPORT  |vp8_sad4x4_neon|
     15 
     16     ARM
     17     REQUIRE8
     18     PRESERVE8
     19 
     20     AREA ||.text||, CODE, READONLY, ALIGN=2
     21 ; unsigned int vp8_sad8x8_c(
     22 ;    unsigned char *src_ptr,
     23 ;    int  src_stride,
     24 ;    unsigned char *ref_ptr,
     25 ;    int  ref_stride)
     26 
     27 |vp8_sad8x8_neon| PROC
     28     vld1.8          {d0}, [r0], r1
     29     vld1.8          {d8}, [r2], r3
     30 
     31     vld1.8          {d2}, [r0], r1
     32     vld1.8          {d10}, [r2], r3
     33 
     34     vabdl.u8        q12, d0, d8
     35 
     36     vld1.8          {d4}, [r0], r1
     37     vld1.8          {d12}, [r2], r3
     38 
     39     vabal.u8        q12, d2, d10
     40 
     41     vld1.8          {d6}, [r0], r1
     42     vld1.8          {d14}, [r2], r3
     43 
     44     vabal.u8        q12, d4, d12
     45 
     46     vld1.8          {d0}, [r0], r1
     47     vld1.8          {d8}, [r2], r3
     48 
     49     vabal.u8        q12, d6, d14
     50 
     51     vld1.8          {d2}, [r0], r1
     52     vld1.8          {d10}, [r2], r3
     53 
     54     vabal.u8        q12, d0, d8
     55 
     56     vld1.8          {d4}, [r0], r1
     57     vld1.8          {d12}, [r2], r3
     58 
     59     vabal.u8        q12, d2, d10
     60 
     61     vld1.8          {d6}, [r0], r1
     62     vld1.8          {d14}, [r2], r3
     63 
     64     vabal.u8        q12, d4, d12
     65     vabal.u8        q12, d6, d14
     66 
     67     vpaddl.u16      q1, q12
     68     vpaddl.u32      q0, q1
     69     vadd.u32        d0, d0, d1
     70 
     71     vmov.32         r0, d0[0]
     72 
     73     bx              lr
     74 
     75     ENDP
     76 
     77 ;============================
     78 ;unsigned int vp8_sad8x16_c(
     79 ;    unsigned char *src_ptr,
     80 ;    int  src_stride,
     81 ;    unsigned char *ref_ptr,
     82 ;    int  ref_stride)
     83 
     84 |vp8_sad8x16_neon| PROC
     85     vld1.8          {d0}, [r0], r1
     86     vld1.8          {d8}, [r2], r3
     87 
     88     vld1.8          {d2}, [r0], r1
     89     vld1.8          {d10}, [r2], r3
     90 
     91     vabdl.u8        q12, d0, d8
     92 
     93     vld1.8          {d4}, [r0], r1
     94     vld1.8          {d12}, [r2], r3
     95 
     96     vabal.u8        q12, d2, d10
     97 
     98     vld1.8          {d6}, [r0], r1
     99     vld1.8          {d14}, [r2], r3
    100 
    101     vabal.u8        q12, d4, d12
    102 
    103     vld1.8          {d0}, [r0], r1
    104     vld1.8          {d8}, [r2], r3
    105 
    106     vabal.u8        q12, d6, d14
    107 
    108     vld1.8          {d2}, [r0], r1
    109     vld1.8          {d10}, [r2], r3
    110 
    111     vabal.u8        q12, d0, d8
    112 
    113     vld1.8          {d4}, [r0], r1
    114     vld1.8          {d12}, [r2], r3
    115 
    116     vabal.u8        q12, d2, d10
    117 
    118     vld1.8          {d6}, [r0], r1
    119     vld1.8          {d14}, [r2], r3
    120 
    121     vabal.u8        q12, d4, d12
    122 
    123     vld1.8          {d0}, [r0], r1
    124     vld1.8          {d8}, [r2], r3
    125 
    126     vabal.u8        q12, d6, d14
    127 
    128     vld1.8          {d2}, [r0], r1
    129     vld1.8          {d10}, [r2], r3
    130 
    131     vabal.u8        q12, d0, d8
    132 
    133     vld1.8          {d4}, [r0], r1
    134     vld1.8          {d12}, [r2], r3
    135 
    136     vabal.u8        q12, d2, d10
    137 
    138     vld1.8          {d6}, [r0], r1
    139     vld1.8          {d14}, [r2], r3
    140 
    141     vabal.u8        q12, d4, d12
    142 
    143     vld1.8          {d0}, [r0], r1
    144     vld1.8          {d8}, [r2], r3
    145 
    146     vabal.u8        q12, d6, d14
    147 
    148     vld1.8          {d2}, [r0], r1
    149     vld1.8          {d10}, [r2], r3
    150 
    151     vabal.u8        q12, d0, d8
    152 
    153     vld1.8          {d4}, [r0], r1
    154     vld1.8          {d12}, [r2], r3
    155 
    156     vabal.u8        q12, d2, d10
    157 
    158     vld1.8          {d6}, [r0], r1
    159     vld1.8          {d14}, [r2], r3
    160 
    161     vabal.u8        q12, d4, d12
    162     vabal.u8        q12, d6, d14
    163 
    164     vpaddl.u16      q1, q12
    165     vpaddl.u32      q0, q1
    166     vadd.u32        d0, d0, d1
    167 
    168     vmov.32         r0, d0[0]
    169 
    170     bx              lr
    171 
    172     ENDP
    173 
    174 ;===========================
    175 ;unsigned int vp8_sad4x4_c(
    176 ;    unsigned char *src_ptr,
    177 ;    int  src_stride,
    178 ;    unsigned char *ref_ptr,
    179 ;    int  ref_stride)
    180 
    181 |vp8_sad4x4_neon| PROC
    182     vld1.8          {d0}, [r0], r1
    183     vld1.8          {d8}, [r2], r3
    184 
    185     vld1.8          {d2}, [r0], r1
    186     vld1.8          {d10}, [r2], r3
    187 
    188     vabdl.u8        q12, d0, d8
    189 
    190     vld1.8          {d4}, [r0], r1
    191     vld1.8          {d12}, [r2], r3
    192 
    193     vabal.u8        q12, d2, d10
    194 
    195     vld1.8          {d6}, [r0], r1
    196     vld1.8          {d14}, [r2], r3
    197 
    198     vabal.u8        q12, d4, d12
    199     vabal.u8        q12, d6, d14
    200 
    201     vpaddl.u16      d1, d24
    202     vpaddl.u32      d0, d1
    203     vmov.32         r0, d0[0]
    204 
    205     bx              lr
    206 
    207     ENDP
    208 
    209     END
    210