Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_variance16x16_armv6|
     13 
     14     ARM
     15     REQUIRE8
     16     PRESERVE8
     17 
     18     AREA ||.text||, CODE, READONLY, ALIGN=2
     19 
     20 ; r0    unsigned char *src_ptr
     21 ; r1    int source_stride
     22 ; r2    unsigned char *ref_ptr
     23 ; r3    int  recon_stride
     24 ; stack unsigned int *sse
     25 |vp8_variance16x16_armv6| PROC
     26 
     27     stmfd   sp!, {r4-r12, lr}
     28 
     29     pld     [r0, r1, lsl #0]
     30     pld     [r2, r3, lsl #0]
     31 
     32     mov     r8, #0              ; initialize sum = 0
     33     mov     r11, #0             ; initialize sse = 0
     34     mov     r12, #16            ; set loop counter to 16 (=block height)
     35 
     36 loop
     37     ; 1st 4 pixels
     38     ldr     r4, [r0, #0]        ; load 4 src pixels
     39     ldr     r5, [r2, #0]        ; load 4 ref pixels
     40 
     41     mov     lr, #0              ; constant zero
     42 
     43     usub8   r6, r4, r5          ; calculate difference
     44     pld     [r0, r1, lsl #1]
     45     sel     r7, r6, lr          ; select bytes with positive difference
     46     usub8   r9, r5, r4          ; calculate difference with reversed operands
     47     pld     [r2, r3, lsl #1]
     48     sel     r6, r9, lr          ; select bytes with negative difference
     49 
     50     ; calculate partial sums
     51     usad8   r4, r7, lr          ; calculate sum of positive differences
     52     usad8   r5, r6, lr          ; calculate sum of negative differences
     53     orr     r6, r6, r7          ; differences of all 4 pixels
     54     ; calculate total sum
     55     adds    r8, r8, r4          ; add positive differences to sum
     56     subs    r8, r8, r5          ; subtract negative differences from sum
     57 
     58     ; calculate sse
     59     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     60     uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
     61     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     62 
     63     ; 2nd 4 pixels
     64     ldr     r4, [r0, #4]        ; load 4 src pixels
     65     ldr     r5, [r2, #4]        ; load 4 ref pixels
     66     smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
     67 
     68     usub8   r6, r4, r5          ; calculate difference
     69     sel     r7, r6, lr          ; select bytes with positive difference
     70     usub8   r9, r5, r4          ; calculate difference with reversed operands
     71     sel     r6, r9, lr          ; select bytes with negative difference
     72 
     73     ; calculate partial sums
     74     usad8   r4, r7, lr          ; calculate sum of positive differences
     75     usad8   r5, r6, lr          ; calculate sum of negative differences
     76     orr     r6, r6, r7          ; differences of all 4 pixels
     77 
     78     ; calculate total sum
     79     add     r8, r8, r4          ; add positive differences to sum
     80     sub     r8, r8, r5          ; subtract negative differences from sum
     81 
     82     ; calculate sse
     83     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     84     uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
     85     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     86 
     87     ; 3rd 4 pixels
     88     ldr     r4, [r0, #8]        ; load 4 src pixels
     89     ldr     r5, [r2, #8]        ; load 4 ref pixels
     90     smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
     91 
     92     usub8   r6, r4, r5          ; calculate difference
     93     sel     r7, r6, lr          ; select bytes with positive difference
     94     usub8   r9, r5, r4          ; calculate difference with reversed operands
     95     sel     r6, r9, lr          ; select bytes with negative difference
     96 
     97     ; calculate partial sums
     98     usad8   r4, r7, lr          ; calculate sum of positive differences
     99     usad8   r5, r6, lr          ; calculate sum of negative differences
    100     orr     r6, r6, r7          ; differences of all 4 pixels
    101 
    102     ; calculate total sum
    103     add     r8, r8, r4          ; add positive differences to sum
    104     sub     r8, r8, r5          ; subtract negative differences from sum
    105 
    106     ; calculate sse
    107     uxtb16  r5, r6              ; byte (two pixels) to halfwords
    108     uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
    109     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
    110 
    111     ; 4th 4 pixels
    112     ldr     r4, [r0, #12]       ; load 4 src pixels
    113     ldr     r5, [r2, #12]       ; load 4 ref pixels
    114     smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
    115 
    116     usub8   r6, r4, r5          ; calculate difference
    117     add     r0, r0, r1          ; set src_ptr to next row
    118     sel     r7, r6, lr          ; select bytes with positive difference
    119     usub8   r9, r5, r4          ; calculate difference with reversed operands
    120     add     r2, r2, r3          ; set dst_ptr to next row
    121     sel     r6, r9, lr          ; select bytes with negative difference
    122 
    123     ; calculate partial sums
    124     usad8   r4, r7, lr          ; calculate sum of positive differences
    125     usad8   r5, r6, lr          ; calculate sum of negative differences
    126     orr     r6, r6, r7          ; differences of all 4 pixels
    127 
    128     ; calculate total sum
    129     add     r8, r8, r4          ; add positive differences to sum
    130     sub     r8, r8, r5          ; subtract negative differences from sum
    131 
    132     ; calculate sse
    133     uxtb16  r5, r6              ; byte (two pixels) to halfwords
    134     uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
    135     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
    136     smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
    137 
    138 
    139     subs    r12, r12, #1
    140 
    141     bne     loop
    142 
    143     ; return stuff
    144     ldr     r6, [sp, #40]       ; get address of sse
    145     mul     r0, r8, r8          ; sum * sum
    146     str     r11, [r6]           ; store sse
    147     sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
    148 
    149     ldmfd   sp!, {r4-r12, pc}
    150 
    151     ENDP
    152 
    153     END
    154 
    155