Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_variance_halfpixvar16x16_h_armv6|
     13 
     14     ARM
     15     REQUIRE8
     16     PRESERVE8
     17 
     18     AREA ||.text||, CODE, READONLY, ALIGN=2
     19 
     20 ; r0    unsigned char *src_ptr
     21 ; r1    int source_stride
     22 ; r2    unsigned char *ref_ptr
     23 ; r3    int  recon_stride
     24 ; stack unsigned int *sse
     25 |vp8_variance_halfpixvar16x16_h_armv6| PROC
     26 
     27     stmfd   sp!, {r4-r12, lr}
     28 
     29     pld     [r0, r1, lsl #0]
     30     pld     [r2, r3, lsl #0]
     31 
     32     mov     r8, #0              ; initialize sum = 0
     33     ldr     r10, c80808080
     34     mov     r11, #0             ; initialize sse = 0
     35     mov     r12, #16            ; set loop counter to 16 (=block height)
     36     mov     lr, #0              ; constant zero
     37 loop
     38     ; 1st 4 pixels
     39     ldr     r4, [r0, #0]        ; load 4 src pixels
     40     ldr     r6, [r0, #1]        ; load 4 src pixels with 1 byte offset
     41     ldr     r5, [r2, #0]        ; load 4 ref pixels
     42 
     43     ; bilinear interpolation
     44     mvn     r6, r6
     45     uhsub8  r4, r4, r6
     46     eor     r4, r4, r10
     47 
     48     usub8   r6, r4, r5          ; calculate difference
     49     pld     [r0, r1, lsl #1]
     50     sel     r7, r6, lr          ; select bytes with positive difference
     51     usub8   r6, r5, r4          ; calculate difference with reversed operands
     52     pld     [r2, r3, lsl #1]
     53     sel     r6, r6, lr          ; select bytes with negative difference
     54 
     55     ; calculate partial sums
     56     usad8   r4, r7, lr          ; calculate sum of positive differences
     57     usad8   r5, r6, lr          ; calculate sum of negative differences
     58     orr     r6, r6, r7          ; differences of all 4 pixels
     59     ; calculate total sum
     60     adds    r8, r8, r4          ; add positive differences to sum
     61     subs    r8, r8, r5          ; subtract negative differences from sum
     62 
     63     ; calculate sse
     64     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     65     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     66     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     67 
     68     ; 2nd 4 pixels
     69     ldr     r4, [r0, #4]        ; load 4 src pixels
     70     ldr     r6, [r0, #5]        ; load 4 src pixels with 1 byte offset
     71     ldr     r5, [r2, #4]        ; load 4 ref pixels
     72 
     73     ; bilinear interpolation
     74     mvn     r6, r6
     75     uhsub8  r4, r4, r6
     76     eor     r4, r4, r10
     77 
     78     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
     79 
     80     usub8   r6, r4, r5          ; calculate difference
     81     sel     r7, r6, lr          ; select bytes with positive difference
     82     usub8   r6, r5, r4          ; calculate difference with reversed operands
     83     sel     r6, r6, lr          ; select bytes with negative difference
     84 
     85     ; calculate partial sums
     86     usad8   r4, r7, lr          ; calculate sum of positive differences
     87     usad8   r5, r6, lr          ; calculate sum of negative differences
     88     orr     r6, r6, r7          ; differences of all 4 pixels
     89 
     90     ; calculate total sum
     91     add     r8, r8, r4          ; add positive differences to sum
     92     sub     r8, r8, r5          ; subtract negative differences from sum
     93 
     94     ; calculate sse
     95     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     96     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     97     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     98 
     99     ; 3rd 4 pixels
    100     ldr     r4, [r0, #8]        ; load 4 src pixels
    101     ldr     r6, [r0, #9]        ; load 4 src pixels with 1 byte offset
    102     ldr     r5, [r2, #8]        ; load 4 ref pixels
    103 
    104     ; bilinear interpolation
    105     mvn     r6, r6
    106     uhsub8  r4, r4, r6
    107     eor     r4, r4, r10
    108 
    109     smlad   r11, r7, r7, r11  ; dual signed multiply, add and accumulate (2)
    110 
    111     usub8   r6, r4, r5          ; calculate difference
    112     sel     r7, r6, lr          ; select bytes with positive difference
    113     usub8   r6, r5, r4          ; calculate difference with reversed operands
    114     sel     r6, r6, lr          ; select bytes with negative difference
    115 
    116     ; calculate partial sums
    117     usad8   r4, r7, lr          ; calculate sum of positive differences
    118     usad8   r5, r6, lr          ; calculate sum of negative differences
    119     orr     r6, r6, r7          ; differences of all 4 pixels
    120 
    121     ; calculate total sum
    122     add     r8, r8, r4          ; add positive differences to sum
    123     sub     r8, r8, r5          ; subtract negative differences from sum
    124 
    125     ; calculate sse
    126     uxtb16  r5, r6              ; byte (two pixels) to halfwords
    127     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    128     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
    129 
    130     ; 4th 4 pixels
    131     ldr     r4, [r0, #12]       ; load 4 src pixels
    132     ldr     r6, [r0, #13]       ; load 4 src pixels with 1 byte offset
    133     ldr     r5, [r2, #12]       ; load 4 ref pixels
    134 
    135     ; bilinear interpolation
    136     mvn     r6, r6
    137     uhsub8  r4, r4, r6
    138     eor     r4, r4, r10
    139 
    140     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
    141 
    142     usub8   r6, r4, r5          ; calculate difference
    143     add     r0, r0, r1          ; set src_ptr to next row
    144     sel     r7, r6, lr          ; select bytes with positive difference
    145     usub8   r6, r5, r4          ; calculate difference with reversed operands
    146     add     r2, r2, r3          ; set dst_ptr to next row
    147     sel     r6, r6, lr          ; select bytes with negative difference
    148 
    149     ; calculate partial sums
    150     usad8   r4, r7, lr          ; calculate sum of positive differences
    151     usad8   r5, r6, lr          ; calculate sum of negative differences
    152     orr     r6, r6, r7          ; differences of all 4 pixels
    153 
    154     ; calculate total sum
    155     add     r8, r8, r4          ; add positive differences to sum
    156     sub     r8, r8, r5          ; subtract negative differences from sum
    157 
    158     ; calculate sse
    159     uxtb16  r5, r6              ; byte (two pixels) to halfwords
    160     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    161     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
    162     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
    163 
    164     subs    r12, r12, #1
    165 
    166     bne     loop
    167 
    168     ; return stuff
    169     ldr     r6, [sp, #40]       ; get address of sse
    170     mul     r0, r8, r8          ; sum * sum
    171     str     r11, [r6]           ; store sse
    172     sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
    173 
    174     ldmfd   sp!, {r4-r12, pc}
    175 
    176     ENDP
    177 
    178 c80808080
    179     DCD     0x80808080
    180 
    181     END
    182 
    183