Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_variance_halfpixvar16x16_hv_armv6|
     13 
     14     ARM
     15     REQUIRE8
     16     PRESERVE8
     17 
     18     AREA ||.text||, CODE, READONLY, ALIGN=2
     19 
     20 ; r0    unsigned char *src_ptr
     21 ; r1    int source_stride
     22 ; r2    unsigned char *ref_ptr
     23 ; r3    int  recon_stride
     24 ; stack unsigned int *sse
     25 |vp8_variance_halfpixvar16x16_hv_armv6| PROC
     26 
     27     stmfd   sp!, {r4-r12, lr}
     28     mov     r8, #0              ; initialize sum = 0
     29     ldr     r10, c80808080
     30     mov     r11, #0             ; initialize sse = 0
     31     mov     r12, #16            ; set loop counter to 16 (=block height)
     32     mov     lr, #0              ; constant zero
     33 loop
     34     add     r9, r0, r1          ; pointer to pixels on the next row
     35     ; 1st 4 pixels
     36     ldr     r4, [r0, #0]        ; load source pixels a, row N
     37     ldr     r6, [r0, #1]        ; load source pixels b, row N
     38     ldr     r5, [r9, #0]        ; load source pixels c, row N+1
     39     ldr     r7, [r9, #1]        ; load source pixels d, row N+1
     40 
     41     ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
     42     mvn     r6, r6
     43     uhsub8  r4, r4, r6
     44     eor     r4, r4, r10
     45     ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
     46     mvn     r7, r7
     47     uhsub8  r5, r5, r7
     48     eor     r5, r5, r10
     49     ; z = (x + y + 1) >> 1, interpolate half pixel values vertically
     50     mvn     r5, r5
     51     uhsub8  r4, r4, r5
     52     ldr     r5, [r2, #0]        ; load 4 ref pixels
     53     eor     r4, r4, r10
     54 
     55     usub8   r6, r4, r5          ; calculate difference
     56     sel     r7, r6, lr          ; select bytes with positive difference
     57     usub8   r6, r5, r4          ; calculate difference with reversed operands
     58     sel     r6, r6, lr          ; select bytes with negative difference
     59 
     60     ; calculate partial sums
     61     usad8   r4, r7, lr          ; calculate sum of positive differences
     62     usad8   r5, r6, lr          ; calculate sum of negative differences
     63     orr     r6, r6, r7          ; differences of all 4 pixels
     64     ; calculate total sum
     65     adds    r8, r8, r4          ; add positive differences to sum
     66     subs    r8, r8, r5          ; substract negative differences from sum
     67 
     68     ; calculate sse
     69     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     70     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     71     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     72 
     73     ; 2nd 4 pixels
     74     ldr     r4, [r0, #4]        ; load source pixels a, row N
     75     ldr     r6, [r0, #5]        ; load source pixels b, row N
     76     ldr     r5, [r9, #4]        ; load source pixels c, row N+1
     77 
     78     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
     79 
     80     ldr     r7, [r9, #5]        ; load source pixels d, row N+1
     81 
     82     ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
     83     mvn     r6, r6
     84     uhsub8  r4, r4, r6
     85     eor     r4, r4, r10
     86     ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
     87     mvn     r7, r7
     88     uhsub8  r5, r5, r7
     89     eor     r5, r5, r10
     90     ; z = (x + y + 1) >> 1, interpolate half pixel values vertically
     91     mvn     r5, r5
     92     uhsub8  r4, r4, r5
     93     ldr     r5, [r2, #4]        ; load 4 ref pixels
     94     eor     r4, r4, r10
     95 
     96     usub8   r6, r4, r5          ; calculate difference
     97     sel     r7, r6, lr          ; select bytes with positive difference
     98     usub8   r6, r5, r4          ; calculate difference with reversed operands
     99     sel     r6, r6, lr          ; select bytes with negative difference
    100 
    101     ; calculate partial sums
    102     usad8   r4, r7, lr          ; calculate sum of positive differences
    103     usad8   r5, r6, lr          ; calculate sum of negative differences
    104     orr     r6, r6, r7          ; differences of all 4 pixels
    105 
    106     ; calculate total sum
    107     add     r8, r8, r4          ; add positive differences to sum
    108     sub     r8, r8, r5          ; substract negative differences from sum
    109 
    110     ; calculate sse
    111     uxtb16  r5, r6              ; byte (two pixels) to halfwords
    112     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    113     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
    114 
    115     ; 3rd 4 pixels
    116     ldr     r4, [r0, #8]        ; load source pixels a, row N
    117     ldr     r6, [r0, #9]        ; load source pixels b, row N
    118     ldr     r5, [r9, #8]        ; load source pixels c, row N+1
    119 
    120     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
    121 
    122     ldr     r7, [r9, #9]        ; load source pixels d, row N+1
    123 
    124     ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
    125     mvn     r6, r6
    126     uhsub8  r4, r4, r6
    127     eor     r4, r4, r10
    128     ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
    129     mvn     r7, r7
    130     uhsub8  r5, r5, r7
    131     eor     r5, r5, r10
    132     ; z = (x + y + 1) >> 1, interpolate half pixel values vertically
    133     mvn     r5, r5
    134     uhsub8  r4, r4, r5
    135     ldr     r5, [r2, #8]        ; load 4 ref pixels
    136     eor     r4, r4, r10
    137 
    138     usub8   r6, r4, r5          ; calculate difference
    139     sel     r7, r6, lr          ; select bytes with positive difference
    140     usub8   r6, r5, r4          ; calculate difference with reversed operands
    141     sel     r6, r6, lr          ; select bytes with negative difference
    142 
    143     ; calculate partial sums
    144     usad8   r4, r7, lr          ; calculate sum of positive differences
    145     usad8   r5, r6, lr          ; calculate sum of negative differences
    146     orr     r6, r6, r7          ; differences of all 4 pixels
    147 
    148     ; calculate total sum
    149     add     r8, r8, r4          ; add positive differences to sum
    150     sub     r8, r8, r5          ; substract negative differences from sum
    151 
    152     ; calculate sse
    153     uxtb16  r5, r6              ; byte (two pixels) to halfwords
    154     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    155     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
    156 
    157     ; 4th 4 pixels
    158     ldr     r4, [r0, #12]       ; load source pixels a, row N
    159     ldr     r6, [r0, #13]       ; load source pixels b, row N
    160     ldr     r5, [r9, #12]       ; load source pixels c, row N+1
    161     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
    162     ldr     r7, [r9, #13]       ; load source pixels d, row N+1
    163 
    164     ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
    165     mvn     r6, r6
    166     uhsub8  r4, r4, r6
    167     eor     r4, r4, r10
    168     ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
    169     mvn     r7, r7
    170     uhsub8  r5, r5, r7
    171     eor     r5, r5, r10
    172     ; z = (x + y + 1) >> 1, interpolate half pixel values vertically
    173     mvn     r5, r5
    174     uhsub8  r4, r4, r5
    175     ldr     r5, [r2, #12]       ; load 4 ref pixels
    176     eor     r4, r4, r10
    177 
    178     usub8   r6, r4, r5          ; calculate difference
    179     add     r0, r0, r1          ; set src_ptr to next row
    180     sel     r7, r6, lr          ; select bytes with positive difference
    181     usub8   r6, r5, r4          ; calculate difference with reversed operands
    182     add     r2, r2, r3          ; set dst_ptr to next row
    183     sel     r6, r6, lr          ; select bytes with negative difference
    184 
    185     ; calculate partial sums
    186     usad8   r4, r7, lr          ; calculate sum of positive differences
    187     usad8   r5, r6, lr          ; calculate sum of negative differences
    188     orr     r6, r6, r7          ; differences of all 4 pixels
    189 
    190     ; calculate total sum
    191     add     r8, r8, r4          ; add positive differences to sum
    192     sub     r8, r8, r5          ; substract negative differences from sum
    193 
    194     ; calculate sse
    195     uxtb16  r5, r6              ; byte (two pixels) to halfwords
    196     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    197     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
    198     subs    r12, r12, #1
    199     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
    200 
    201     bne     loop
    202 
    203     ; return stuff
    204     ldr     r6, [sp, #40]       ; get address of sse
    205     mul     r0, r8, r8          ; sum * sum
    206     str     r11, [r6]           ; store sse
    207     sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
    208 
    209     ldmfd   sp!, {r4-r12, pc}
    210 
    211     ENDP
    212 
    213 c80808080
    214     DCD     0x80808080
    215 
    216     END
    217