Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_sad16x16_armv6|
     13 
     14     ARM
     15     REQUIRE8
     16     PRESERVE8
     17 
     18     AREA ||.text||, CODE, READONLY, ALIGN=2
     19 
     20 ; r0    const unsigned char *src_ptr
     21 ; r1    int  src_stride
     22 ; r2    const unsigned char *ref_ptr
     23 ; r3    int  ref_stride
     24 ; stack max_sad (not used)
     25 |vp8_sad16x16_armv6| PROC
     26     stmfd   sp!, {r4-r12, lr}
     27 
     28     pld     [r0, r1, lsl #0]
     29     pld     [r2, r3, lsl #0]
     30     pld     [r0, r1, lsl #1]
     31     pld     [r2, r3, lsl #1]
     32 
     33     mov     r4, #0              ; sad = 0;
     34     mov     r5, #8              ; loop count
     35 
     36 loop
     37     ; 1st row
     38     ldr     r6, [r0, #0x0]      ; load 4 src pixels (1A)
     39     ldr     r8, [r2, #0x0]      ; load 4 ref pixels (1A)
     40     ldr     r7, [r0, #0x4]      ; load 4 src pixels (1A)
     41     ldr     r9, [r2, #0x4]      ; load 4 ref pixels (1A)
     42     ldr     r10, [r0, #0x8]     ; load 4 src pixels (1B)
     43     ldr     r11, [r0, #0xC]     ; load 4 src pixels (1B)
     44 
     45     usada8  r4, r8, r6, r4      ; calculate sad for 4 pixels
     46     usad8   r8, r7, r9          ; calculate sad for 4 pixels
     47 
     48     ldr     r12, [r2, #0x8]     ; load 4 ref pixels (1B)
     49     ldr     lr, [r2, #0xC]      ; load 4 ref pixels (1B)
     50 
     51     add     r0, r0, r1          ; set src pointer to next row
     52     add     r2, r2, r3          ; set dst pointer to next row
     53 
     54     pld     [r0, r1, lsl #1]
     55     pld     [r2, r3, lsl #1]
     56 
     57     usada8  r4, r10, r12, r4    ; calculate sad for 4 pixels
     58     usada8  r8, r11, lr, r8     ; calculate sad for 4 pixels
     59 
     60     ldr     r6, [r0, #0x0]      ; load 4 src pixels (2A)
     61     ldr     r7, [r0, #0x4]      ; load 4 src pixels (2A)
     62     add     r4, r4, r8          ; add partial sad values
     63 
     64     ; 2nd row
     65     ldr     r8, [r2, #0x0]      ; load 4 ref pixels (2A)
     66     ldr     r9, [r2, #0x4]      ; load 4 ref pixels (2A)
     67     ldr     r10, [r0, #0x8]     ; load 4 src pixels (2B)
     68     ldr     r11, [r0, #0xC]     ; load 4 src pixels (2B)
     69 
     70     usada8  r4, r6, r8, r4      ; calculate sad for 4 pixels
     71     usad8   r8, r7, r9          ; calculate sad for 4 pixels
     72 
     73     ldr     r12, [r2, #0x8]     ; load 4 ref pixels (2B)
     74     ldr     lr, [r2, #0xC]      ; load 4 ref pixels (2B)
     75 
     76     add     r0, r0, r1          ; set src pointer to next row
     77     add     r2, r2, r3          ; set dst pointer to next row
     78 
     79     usada8  r4, r10, r12, r4    ; calculate sad for 4 pixels
     80     usada8  r8, r11, lr, r8     ; calculate sad for 4 pixels
     81 
     82     pld     [r0, r1, lsl #1]
     83     pld     [r2, r3, lsl #1]
     84 
     85     subs    r5, r5, #1          ; decrement loop counter
     86     add     r4, r4, r8          ; add partial sad values
     87 
     88     bne     loop
     89 
     90     mov     r0, r4              ; return sad
     91     ldmfd   sp!, {r4-r12, pc}
     92 
     93     ENDP
     94 
     95     END
     96 
     97