1 @/****************************************************************************** 2 @ * 3 @ * Copyright (C) 2015 The Android Open Source Project 4 @ * 5 @ * Licensed under the Apache License, Version 2.0 (the "License"); 6 @ * you may not use this file except in compliance with the License. 7 @ * You may obtain a copy of the License at: 8 @ * 9 @ * http://www.apache.org/licenses/LICENSE-2.0 10 @ * 11 @ * Unless required by applicable law or agreed to in writing, software 12 @ * distributed under the License is distributed on an "AS IS" BASIS, 13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @ * See the License for the specific language governing permissions and 15 @ * limitations under the License. 16 @ * 17 @ ***************************************************************************** 18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 @*/ 20 21 @/** 22 @****************************************************************************** 23 @* 24 @* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC ) 25 @* and do the prediction. 26 @* 27 @* @par Description 28 @* This function evaluates first three 16x16 modes and compute corresponding sad 29 @* and return the buffer predicted with best mode. 30 @* 31 @* @param[in] pu1_src 32 @* UWORD8 pointer to the source 33 @* 34 @** @param[in] pu1_ngbr_pels_i16 35 @* UWORD8 pointer to neighbouring pels 36 @* 37 @* @param[out] pu1_dst 38 @* UWORD8 pointer to the destination 39 @* 40 @* @param[in] src_strd 41 @* integer source stride 42 @* 43 @* @param[in] dst_strd 44 @* integer destination stride 45 @* 46 @* @param[in] u4_n_avblty 47 @* availability of neighbouring pixels 48 @* 49 @* @param[in] u4_intra_mode 50 @* Pointer to the variable in which best mode is returned 51 @* 52 @* @param[in] pu4_sadmin 53 @* Pointer to the variable in which minimum sad is returned 54 @* 55 @* @param[in] u4_valid_intra_modes 56 @* Says what all modes are valid 57 @* 58 @* 59 @* @return none 60 @* 61 @****************************************************************************** 62 @*/ 63 @ 64 @void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src, 65 @ UWORD8 *pu1_ngbr_pels_i16, 66 @ UWORD8 *pu1_dst, 67 @ UWORD32 src_strd, 68 @ UWORD32 dst_strd, 69 @ WORD32 u4_n_avblty, 70 @ UWORD32 *u4_intra_mode, 71 @ WORD32 *pu4_sadmin, 72 @ UWORD32 u4_valid_intra_modes) 73 @ 74 .text 75 .p2align 2 76 77 .global ih264e_evaluate_intra16x16_modes_a9q 78 79 ih264e_evaluate_intra16x16_modes_a9q: 80 81 @r0 = pu1_src, 82 @r1 = pu1_ngbr_pels_i16, 83 @r2 = pu1_dst, 84 @r3 = src_strd, 85 @r4 = dst_strd, 86 @r5 = u4_n_avblty, 87 @r6 = u4_intra_mode, 88 @r7 = pu4_sadmin 89 90 91 92 stmfd sp!, {r4-r12, r14} @store register values to stack 93 ldr r5, [sp, #44] 94 95 96 vpush {d8-d15} 97 vld1.32 {q4}, [r1]! 98 sub r6, r1, #1 99 add r1, r1, #1 100 mov r10, #0 101 vld1.32 {q5}, [r1]! 102 mov r11, #0 103 mov r4, #0 104 @/* Left available ???? */ 105 ands r7, r5, #01 106 movne r10, #1 107 108 @/* Top available ???? */ 109 ands r8, r5, #04 110 lsl r9, r10, #3 111 movne r11, #1 112 lsl r12, r11, #3 113 adds r8, r9, r12 114 115 116 @/* None available :( */ 117 moveq r4, #128 118 119 120 121 @/fINDING dc val*/ 122 @---------------------- 123 vaddl.u8 q15, d8, d9 124 125 vaddl.u8 q14, d10, d11 126 127 vadd.u16 q15, q14, q15 128 @ VLD1.32 {q2},[r0],r3;row 2 129 vadd.u16 d30, d31, d30 130 vpadd.u16 d30, d30 131 @ VLD1.32 {q3},[r0],r3 ;row 3 132 vpadd.u16 d30, d30 133 @--------------------- 134 135 136 vmov.u16 r7, d30[0] 137 add r7, r7, r8 138 add r11, r11, #3 139 add r8, r10, r11 140 141 lsr r7, r8 142 add r7, r4, r7 143 vld1.32 {q0}, [r0], r3 @ source r0w 0 144 vdup.8 q15, r7 @dc val 145 146 @/* computing SADs for all three modes*/ 147 ldrb r7, [r6] 148 vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=0; 149 @/vertical row 0; 150 vabdl.u8 q8, d0, d10 151 vabdl.u8 q9, d1, d11 152 sub r6, r6, #1 153 @/HORZ row 0; 154 vabdl.u8 q13, d0, d20 155 vabdl.u8 q14, d1, d21 156 mov r1, #15 157 @/dc row 0; 158 vabdl.u8 q11, d0, d30 159 vabdl.u8 q12, d1, d31 160 161 162 loop: 163 vld1.32 {q1}, [r0], r3 @row i 164 @/dc row i; 165 vabal.u8 q11, d2, d30 166 ldrb r7, [r6] 167 vabal.u8 q12, d3, d31 168 169 @/vertical row i; 170 vabal.u8 q8, d2, d10 171 vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=i; 172 sub r6, r6, #1 173 vabal.u8 q9, d3, d11 174 175 subs r1, r1, #1 176 @/HORZ row i; 177 vabal.u8 q13, d2, d20 178 vabal.u8 q14, d3, d21 179 bne loop 180 181 @------------------------------------------------------------------------------ 182 183 vadd.i16 q9, q9, q8 @/VERT 184 vadd.i16 d18, d19, d18 @/VERT 185 vpaddl.u16 d18, d18 @/VERT 186 vadd.i16 q14, q13, q14 @/HORZ 187 vadd.i16 d28, d29, d28 @/HORZ 188 vpaddl.u32 d18, d18 @/VERT 189 vpaddl.u16 d28, d28 @/HORZ 190 191 vpaddl.u32 d28, d28 @/HORZ 192 vmov.u32 r8, d18[0] @ vert 193 vadd.i16 q12, q11, q12 @/DC 194 vmov.u32 r9, d28[0] @horz 195 mov r11, #1 196 vadd.i16 d24, d24, d25 @/DC 197 lsl r11 , #30 198 199 @----------------------- 200 ldr r0, [sp, #120] @ u4_valid_intra_modes 201 @-------------------------------------------- 202 ands r7, r0, #01 @ vert mode valid???????????? 203 moveq r8, r11 204 vpaddl.u16 d24, d24 @/DC 205 206 ands r6, r0, #02 @ horz mode valid???????????? 207 moveq r9, r11 208 vpaddl.u32 d24, d24 @/DC 209 210 vmov.u32 r10, d24[0] @dc 211 @-------------------------------- 212 ldr r4, [sp, #104] @r4 = dst_strd, 213 ldr r7, [sp, #116] @r7 = pu4_sadmin 214 @---------------------------------------------- 215 ands r6, r0, #04 @ dc mode valid???????????? 216 moveq r10, r11 217 218 @--------------------------- 219 ldr r6, [sp, #112] @ R6 =MODE 220 @-------------------------- 221 222 cmp r8, r9 223 bgt not_vert 224 cmp r8, r10 225 bgt do_dc 226 227 @/---------------------- 228 @DO VERTICAL PREDICTION 229 str r8 , [r7] @MIN SAD 230 mov r8, #0 231 str r8 , [r6] @ MODE 232 vmov q15, q5 233 234 b do_dc_vert 235 @----------------------------- 236 not_vert: 237 cmp r9, r10 238 bgt do_dc 239 240 @/---------------------- 241 @DO HORIZONTAL 242 vdup.8 q5, d9[7] @0 243 str r9 , [r7] @MIN SAD 244 vdup.8 q6, d9[6] @1 245 mov r9, #1 246 vdup.8 q7, d9[5] @2 247 vst1.32 {d10, d11} , [r2], r4 @0 248 vdup.8 q8, d9[4] @3 249 str r9 , [r6] @ MODE 250 vdup.8 q9, d9[3] @4 251 vst1.32 {d12, d13} , [r2], r4 @1 252 vdup.8 q10, d9[2] @5 253 vst1.32 {d14, d15} , [r2], r4 @2 254 vdup.8 q11, d9[1] @6 255 vst1.32 {d16, d17} , [r2], r4 @3 256 vdup.8 q12, d9[0] @7 257 vst1.32 {d18, d19} , [r2], r4 @4 258 vdup.8 q13, d8[7] @8 259 vst1.32 {d20, d21} , [r2], r4 @5 260 vdup.8 q14, d8[6] @9 261 vst1.32 {d22, d23} , [r2], r4 @6 262 vdup.8 q15, d8[5] @10 263 vst1.32 {d24, d25} , [r2], r4 @7 264 vdup.8 q1, d8[4] @11 265 vst1.32 {d26, d27} , [r2], r4 @8 266 vdup.8 q2, d8[3] @12 267 vst1.32 {d28, d29} , [r2], r4 @9 268 vdup.8 q3, d8[2] @13 269 vst1.32 {d30, d31}, [r2], r4 @10 270 vdup.8 q5, d8[1] @14 271 vst1.32 {d2, d3} , [r2], r4 @11 272 vdup.8 q6, d8[0] @15 273 vst1.32 {d4, d5} , [r2], r4 @12 274 275 vst1.32 {d6, d7} , [r2], r4 @13 276 277 vst1.32 {d10, d11} , [r2], r4 @14 278 279 vst1.32 {d12, d13} , [r2], r4 @15 280 b end_func 281 282 283 @/----------------------------- 284 285 do_dc: @/--------------------------------- 286 @DO DC 287 str r10 , [r7] @MIN SAD 288 mov r10, #2 289 str r10 , [r6] @ MODE 290 do_dc_vert: 291 vst1.32 {d30, d31}, [r2], r4 @0 292 vst1.32 {d30, d31}, [r2], r4 @1 293 vst1.32 {d30, d31}, [r2], r4 @2 294 vst1.32 {d30, d31}, [r2], r4 @3 295 vst1.32 {d30, d31}, [r2], r4 @4 296 vst1.32 {d30, d31}, [r2], r4 @5 297 vst1.32 {d30, d31}, [r2], r4 @6 298 vst1.32 {d30, d31}, [r2], r4 @7 299 vst1.32 {d30, d31}, [r2], r4 @8 300 vst1.32 {d30, d31}, [r2], r4 @9 301 vst1.32 {d30, d31}, [r2], r4 @10 302 vst1.32 {d30, d31}, [r2], r4 @11 303 vst1.32 {d30, d31}, [r2], r4 @12 304 vst1.32 {d30, d31}, [r2], r4 @13 305 vst1.32 {d30, d31}, [r2], r4 @14 306 vst1.32 {d30, d31}, [r2], r4 @15 307 @/------------------ 308 end_func: 309 vpop {d8-d15} 310 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 311 312 313