1 ; Copyright (C) 2009 The Android Open Source Project 2 ; 3 ; Licensed under the Apache License, Version 2.0 (the "License"); 4 ; you may not use this file except in compliance with the License. 5 ; You may obtain a copy of the License at 6 ; 7 ; http://www.apache.org/licenses/LICENSE-2.0 8 ; 9 ; Unless required by applicable law or agreed to in writing, software 10 ; distributed under the License is distributed on an "AS IS" BASIS, 11 ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 ; See the License for the specific language governing permissions and 13 ; limitations under the License. 14 15 ;------------------------------------------------------------------------------- 16 ;-- 17 ;-- Abstract : ARMv6 optimized version horizontal part of 18 ;-- h264bsdInterpolateMid functions 19 ;-- 20 ;------------------------------------------------------------------------------- 21 22 23 IF :DEF: H264DEC_WINASM 24 ;// We dont use REQUIRE8 and PRESERVE8 for winasm 25 ELSE 26 REQUIRE8 27 PRESERVE8 28 ENDIF 29 30 AREA |.text|, CODE 31 32 33 ;// Register allocation 34 35 ref RN 0 ;// pointer to current position in reference image 36 mb RN 1 ;// pointer to current position in interpolated mb 37 count RN 2 ;// bit-packed width and count values 38 39 x_2_0 RN 4 40 x_3_1 RN 5 41 x_6_4 RN 6 42 x_7_5 RN 7 43 44 tmp1 RN 8 45 tmp2 RN 9 46 tmp3 RN 10 47 tmp4 RN 11 48 49 mult_20_01 RN 12 ;// [20, 1] 50 mult_20_m5 RN 14 ;// [20, -5] 51 52 53 EXPORT h264bsdInterpolateMidHorPart 54 55 ;// Horizontal filter approach 56 ;// 57 ;// Basic idea in horizontal filtering is to adjust coefficients 58 ;// like below. Calculation is done with 16-bit maths. 59 ;// 60 ;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0 61 ;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ... 62 ;// y_0 = 20 1 20 -5 -5 1 63 ;// y_1 = -5 20 1 1 20 -5 64 ;// y_2 = 1 -5 -5 20 1 20 65 ;// y_3 = 1 20 -5 -5 20 1 66 67 68 h264bsdInterpolateMidHorPart 69 STMFD sp!, {r4-r11, lr} 70 71 ;// pack values to count register 72 ;// [31:28] loop_x (partWidth-1) 73 ;// [27:24] loop_y (partHeight-1) 74 ;// [23:20] partWidth-1 75 ;// [19:16] partHeight-1 76 ;// [15:00] width 77 78 79 LDR mult_20_01, = 0x00140001 80 LDR mult_20_m5, = 0x0014FFFB 81 AND tmp3, count, #0x000F0000 ;// partWidth-1 82 loop_y 83 LDR x_3_1, [ref, #-8] 84 ADD count, count, tmp3, LSL #12 85 LDR x_7_5, [ref, #-4] 86 UXTB16 x_2_0, x_3_1 87 UXTB16 x_3_1, x_3_1, ROR #8 88 UXTB16 x_6_4, x_7_5 89 90 loop_x 91 UXTB16 x_7_5, x_7_5, ROR #8 92 93 SMUAD tmp1, x_2_0, mult_20_01 94 SMULTB tmp2, x_2_0, mult_20_m5 95 SMULTB tmp3, x_2_0, mult_20_01 96 SMULTB tmp4, x_3_1, mult_20_01 97 98 SMLAD tmp1, x_3_1, mult_20_m5, tmp1 99 SMLAD tmp2, x_3_1, mult_20_01, tmp2 100 SMLATB tmp3, x_3_1, mult_20_m5, tmp3 101 LDR x_3_1, [ref], #4 102 SMLAD tmp4, x_6_4, mult_20_m5, tmp4 103 104 SMLABB tmp1, x_6_4, mult_20_m5, tmp1 105 SMLADX tmp2, x_6_4, mult_20_01, tmp2 106 SMLADX tmp3, x_6_4, mult_20_m5, tmp3 107 SMLADX tmp4, x_7_5, mult_20_m5, tmp4 108 109 SMLABB tmp1, x_7_5, mult_20_01, tmp1 110 SMLABB tmp2, x_7_5, mult_20_m5, tmp2 111 UXTB16 x_2_0, x_3_1 112 SMLADX tmp3, x_7_5, mult_20_01, tmp3 113 SMLABB tmp4, x_2_0, mult_20_01, tmp4 114 115 SUBS count, count, #4<<28 116 STR tmp1, [mb], #4 117 STR tmp2, [mb], #4 118 STR tmp3, [mb], #4 119 STR tmp4, [mb], #4 120 BCC next_y 121 122 UXTB16 x_3_1, x_3_1, ROR #8 123 124 SMUAD tmp1, x_6_4, mult_20_01 125 SMULTB tmp2, x_6_4, mult_20_m5 126 SMULTB tmp3, x_6_4, mult_20_01 127 SMULTB tmp4, x_7_5, mult_20_01 128 129 SMLAD tmp1, x_7_5, mult_20_m5, tmp1 130 SMLAD tmp2, x_7_5, mult_20_01, tmp2 131 SMLATB tmp3, x_7_5, mult_20_m5, tmp3 132 LDR x_7_5, [ref], #4 133 SMLAD tmp4, x_2_0, mult_20_m5, tmp4 134 135 SMLABB tmp1, x_2_0, mult_20_m5, tmp1 136 SMLADX tmp2, x_2_0, mult_20_01, tmp2 137 SMLADX tmp3, x_2_0, mult_20_m5, tmp3 138 SMLADX tmp4, x_3_1, mult_20_m5, tmp4 139 140 SMLABB tmp1, x_3_1, mult_20_01, tmp1 141 SMLABB tmp2, x_3_1, mult_20_m5, tmp2 142 UXTB16 x_6_4, x_7_5 143 SMLADX tmp3, x_3_1, mult_20_01, tmp3 144 SMLABB tmp4, x_6_4, mult_20_01, tmp4 145 146 SUBS count, count, #4<<28 147 STR tmp1, [mb], #4 148 STR tmp2, [mb], #4 149 STR tmp3, [mb], #4 150 STR tmp4, [mb], #4 151 BCS loop_x 152 153 next_y 154 AND tmp3, count, #0x000F0000 ;// partWidth-1 155 SMLABB ref, count, mult_20_01, ref ;// +width 156 SBC ref, ref, tmp3, LSR #16 ;// -(partWidth-1)-1 157 ADDS count, count, #(1<<28)-(1<<20) 158 BGE loop_y 159 160 LDMFD sp!, {r4-r11, pc} 161 162 END 163 164