1 ; 2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. 3 ; 4 ; Use of this source code is governed by a BSD-style license 5 ; that can be found in the LICENSE file in the root of the source 6 ; tree. An additional intellectual property rights grant can be found 7 ; in the file PATENTS. All contributing project authors may 8 ; be found in the AUTHORS file in the root of the source tree. 9 ; 10 11 12 EXPORT |vp8_variance_halfpixvar16x16_h_armv6| 13 14 ARM 15 REQUIRE8 16 PRESERVE8 17 18 AREA ||.text||, CODE, READONLY, ALIGN=2 19 20 ; r0 unsigned char *src_ptr 21 ; r1 int source_stride 22 ; r2 unsigned char *ref_ptr 23 ; r3 int recon_stride 24 ; stack unsigned int *sse 25 |vp8_variance_halfpixvar16x16_h_armv6| PROC 26 27 stmfd sp!, {r4-r12, lr} 28 mov r8, #0 ; initialize sum = 0 29 ldr r10, c80808080 30 mov r11, #0 ; initialize sse = 0 31 mov r12, #16 ; set loop counter to 16 (=block height) 32 mov lr, #0 ; constant zero 33 loop 34 ; 1st 4 pixels 35 ldr r4, [r0, #0] ; load 4 src pixels 36 ldr r6, [r0, #1] ; load 4 src pixels with 1 byte offset 37 ldr r5, [r2, #0] ; load 4 ref pixels 38 39 ; bilinear interpolation 40 mvn r6, r6 41 uhsub8 r4, r4, r6 42 eor r4, r4, r10 43 44 usub8 r6, r4, r5 ; calculate difference 45 sel r7, r6, lr ; select bytes with positive difference 46 usub8 r6, r5, r4 ; calculate difference with reversed operands 47 sel r6, r6, lr ; select bytes with negative difference 48 49 ; calculate partial sums 50 usad8 r4, r7, lr ; calculate sum of positive differences 51 usad8 r5, r6, lr ; calculate sum of negative differences 52 orr r6, r6, r7 ; differences of all 4 pixels 53 ; calculate total sum 54 adds r8, r8, r4 ; add positive differences to sum 55 subs r8, r8, r5 ; substract negative differences from sum 56 57 ; calculate sse 58 uxtb16 r5, r6 ; byte (two pixels) to halfwords 59 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 60 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 61 62 ; 2nd 4 pixels 63 ldr r4, [r0, #4] ; load 4 src pixels 64 ldr r6, [r0, #5] ; load 4 src pixels with 1 byte offset 65 ldr r5, [r2, #4] ; load 4 ref pixels 66 67 ; bilinear interpolation 68 mvn r6, r6 69 uhsub8 r4, r4, r6 70 eor r4, r4, r10 71 72 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 73 74 usub8 r6, r4, r5 ; calculate difference 75 sel r7, r6, lr ; select bytes with positive difference 76 usub8 r6, r5, r4 ; calculate difference with reversed operands 77 sel r6, r6, lr ; select bytes with negative difference 78 79 ; calculate partial sums 80 usad8 r4, r7, lr ; calculate sum of positive differences 81 usad8 r5, r6, lr ; calculate sum of negative differences 82 orr r6, r6, r7 ; differences of all 4 pixels 83 84 ; calculate total sum 85 add r8, r8, r4 ; add positive differences to sum 86 sub r8, r8, r5 ; substract negative differences from sum 87 88 ; calculate sse 89 uxtb16 r5, r6 ; byte (two pixels) to halfwords 90 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 91 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 92 93 ; 3rd 4 pixels 94 ldr r4, [r0, #8] ; load 4 src pixels 95 ldr r6, [r0, #9] ; load 4 src pixels with 1 byte offset 96 ldr r5, [r2, #8] ; load 4 ref pixels 97 98 ; bilinear interpolation 99 mvn r6, r6 100 uhsub8 r4, r4, r6 101 eor r4, r4, r10 102 103 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 104 105 usub8 r6, r4, r5 ; calculate difference 106 sel r7, r6, lr ; select bytes with positive difference 107 usub8 r6, r5, r4 ; calculate difference with reversed operands 108 sel r6, r6, lr ; select bytes with negative difference 109 110 ; calculate partial sums 111 usad8 r4, r7, lr ; calculate sum of positive differences 112 usad8 r5, r6, lr ; calculate sum of negative differences 113 orr r6, r6, r7 ; differences of all 4 pixels 114 115 ; calculate total sum 116 add r8, r8, r4 ; add positive differences to sum 117 sub r8, r8, r5 ; substract negative differences from sum 118 119 ; calculate sse 120 uxtb16 r5, r6 ; byte (two pixels) to halfwords 121 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 122 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 123 124 ; 4th 4 pixels 125 ldr r4, [r0, #12] ; load 4 src pixels 126 ldr r6, [r0, #13] ; load 4 src pixels with 1 byte offset 127 ldr r5, [r2, #12] ; load 4 ref pixels 128 129 ; bilinear interpolation 130 mvn r6, r6 131 uhsub8 r4, r4, r6 132 eor r4, r4, r10 133 134 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 135 136 usub8 r6, r4, r5 ; calculate difference 137 add r0, r0, r1 ; set src_ptr to next row 138 sel r7, r6, lr ; select bytes with positive difference 139 usub8 r6, r5, r4 ; calculate difference with reversed operands 140 add r2, r2, r3 ; set dst_ptr to next row 141 sel r6, r6, lr ; select bytes with negative difference 142 143 ; calculate partial sums 144 usad8 r4, r7, lr ; calculate sum of positive differences 145 usad8 r5, r6, lr ; calculate sum of negative differences 146 orr r6, r6, r7 ; differences of all 4 pixels 147 148 ; calculate total sum 149 add r8, r8, r4 ; add positive differences to sum 150 sub r8, r8, r5 ; substract negative differences from sum 151 152 ; calculate sse 153 uxtb16 r5, r6 ; byte (two pixels) to halfwords 154 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 155 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 156 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 157 158 subs r12, r12, #1 159 160 bne loop 161 162 ; return stuff 163 ldr r6, [sp, #40] ; get address of sse 164 mul r0, r8, r8 ; sum * sum 165 str r11, [r6] ; store sse 166 sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) 167 168 ldmfd sp!, {r4-r12, pc} 169 170 ENDP 171 172 c80808080 173 DCD 0x80808080 174 175 END 176 177