Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_variance_halfpixvar16x16_h_armv6|
     13 
     14     ARM
     15     REQUIRE8
     16     PRESERVE8
     17 
     18     AREA ||.text||, CODE, READONLY, ALIGN=2
     19 
     20 ; r0    unsigned char *src_ptr
     21 ; r1    int source_stride
     22 ; r2    unsigned char *ref_ptr
     23 ; r3    int  recon_stride
     24 ; stack unsigned int *sse
     25 |vp8_variance_halfpixvar16x16_h_armv6| PROC
     26 
     27     stmfd   sp!, {r4-r12, lr}
     28     mov     r8, #0              ; initialize sum = 0
     29     ldr     r10, c80808080
     30     mov     r11, #0             ; initialize sse = 0
     31     mov     r12, #16            ; set loop counter to 16 (=block height)
     32     mov     lr, #0              ; constant zero
     33 loop
     34     ; 1st 4 pixels
     35     ldr     r4, [r0, #0]        ; load 4 src pixels
     36     ldr     r6, [r0, #1]        ; load 4 src pixels with 1 byte offset
     37     ldr     r5, [r2, #0]        ; load 4 ref pixels
     38 
     39     ; bilinear interpolation
     40     mvn     r6, r6
     41     uhsub8  r4, r4, r6
     42     eor     r4, r4, r10
     43 
     44     usub8   r6, r4, r5          ; calculate difference
     45     sel     r7, r6, lr          ; select bytes with positive difference
     46     usub8   r6, r5, r4          ; calculate difference with reversed operands
     47     sel     r6, r6, lr          ; select bytes with negative difference
     48 
     49     ; calculate partial sums
     50     usad8   r4, r7, lr          ; calculate sum of positive differences
     51     usad8   r5, r6, lr          ; calculate sum of negative differences
     52     orr     r6, r6, r7          ; differences of all 4 pixels
     53     ; calculate total sum
     54     adds    r8, r8, r4          ; add positive differences to sum
     55     subs    r8, r8, r5          ; substract negative differences from sum
     56 
     57     ; calculate sse
     58     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     59     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     60     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     61 
     62     ; 2nd 4 pixels
     63     ldr     r4, [r0, #4]        ; load 4 src pixels
     64     ldr     r6, [r0, #5]        ; load 4 src pixels with 1 byte offset
     65     ldr     r5, [r2, #4]        ; load 4 ref pixels
     66 
     67     ; bilinear interpolation
     68     mvn     r6, r6
     69     uhsub8  r4, r4, r6
     70     eor     r4, r4, r10
     71 
     72     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
     73 
     74     usub8   r6, r4, r5          ; calculate difference
     75     sel     r7, r6, lr          ; select bytes with positive difference
     76     usub8   r6, r5, r4          ; calculate difference with reversed operands
     77     sel     r6, r6, lr          ; select bytes with negative difference
     78 
     79     ; calculate partial sums
     80     usad8   r4, r7, lr          ; calculate sum of positive differences
     81     usad8   r5, r6, lr          ; calculate sum of negative differences
     82     orr     r6, r6, r7          ; differences of all 4 pixels
     83 
     84     ; calculate total sum
     85     add     r8, r8, r4          ; add positive differences to sum
     86     sub     r8, r8, r5          ; substract negative differences from sum
     87 
     88     ; calculate sse
     89     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     90     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     91     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     92 
     93     ; 3rd 4 pixels
     94     ldr     r4, [r0, #8]        ; load 4 src pixels
     95     ldr     r6, [r0, #9]        ; load 4 src pixels with 1 byte offset
     96     ldr     r5, [r2, #8]        ; load 4 ref pixels
     97 
     98     ; bilinear interpolation
     99     mvn     r6, r6
    100     uhsub8  r4, r4, r6
    101     eor     r4, r4, r10
    102 
    103     smlad   r11, r7, r7, r11  ; dual signed multiply, add and accumulate (2)
    104 
    105     usub8   r6, r4, r5          ; calculate difference
    106     sel     r7, r6, lr          ; select bytes with positive difference
    107     usub8   r6, r5, r4          ; calculate difference with reversed operands
    108     sel     r6, r6, lr          ; select bytes with negative difference
    109 
    110     ; calculate partial sums
    111     usad8   r4, r7, lr          ; calculate sum of positive differences
    112     usad8   r5, r6, lr          ; calculate sum of negative differences
    113     orr     r6, r6, r7          ; differences of all 4 pixels
    114 
    115     ; calculate total sum
    116     add     r8, r8, r4          ; add positive differences to sum
    117     sub     r8, r8, r5          ; substract negative differences from sum
    118 
    119     ; calculate sse
    120     uxtb16  r5, r6              ; byte (two pixels) to halfwords
    121     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    122     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
    123 
    124     ; 4th 4 pixels
    125     ldr     r4, [r0, #12]       ; load 4 src pixels
    126     ldr     r6, [r0, #13]       ; load 4 src pixels with 1 byte offset
    127     ldr     r5, [r2, #12]       ; load 4 ref pixels
    128 
    129     ; bilinear interpolation
    130     mvn     r6, r6
    131     uhsub8  r4, r4, r6
    132     eor     r4, r4, r10
    133 
    134     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
    135 
    136     usub8   r6, r4, r5          ; calculate difference
    137     add     r0, r0, r1          ; set src_ptr to next row
    138     sel     r7, r6, lr          ; select bytes with positive difference
    139     usub8   r6, r5, r4          ; calculate difference with reversed operands
    140     add     r2, r2, r3          ; set dst_ptr to next row
    141     sel     r6, r6, lr          ; select bytes with negative difference
    142 
    143     ; calculate partial sums
    144     usad8   r4, r7, lr          ; calculate sum of positive differences
    145     usad8   r5, r6, lr          ; calculate sum of negative differences
    146     orr     r6, r6, r7          ; differences of all 4 pixels
    147 
    148     ; calculate total sum
    149     add     r8, r8, r4          ; add positive differences to sum
    150     sub     r8, r8, r5          ; substract negative differences from sum
    151 
    152     ; calculate sse
    153     uxtb16  r5, r6              ; byte (two pixels) to halfwords
    154     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    155     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
    156     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
    157 
    158     subs    r12, r12, #1
    159 
    160     bne     loop
    161 
    162     ; return stuff
    163     ldr     r6, [sp, #40]       ; get address of sse
    164     mul     r0, r8, r8          ; sum * sum
    165     str     r11, [r6]           ; store sse
    166     sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
    167 
    168     ldmfd   sp!, {r4-r12, pc}
    169 
    170     ENDP
    171 
    172 c80808080
    173     DCD     0x80808080
    174 
    175     END
    176 
    177