1 /* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 /* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 .text 27 28 .global omxVCM4P10_InterpolateLuma 29 .func omxVCM4P10_InterpolateLuma 30 omxVCM4P10_InterpolateLuma: 31 PUSH {r4-r12,lr} 32 VPUSH {d8-d15} 33 SUB sp,sp,#0x10 34 LDR r6,[sp,#0x78] 35 LDR r7,[sp,#0x7c] 36 LDR r5,[sp,#0x80] 37 LDR r4,[sp,#0x84] 38 ADD r6,r6,r7,LSL #2 39 ADD r11,sp,#0 40 VMOV.I16 d31,#0x14 41 VMOV.I16 d30,#0x5 42 L0x2c: 43 STM r11,{r0-r3} 44 ADD pc,pc,r6,LSL #2 45 B L0x3f0 46 B L0x78 47 B L0xa8 48 B L0xdc 49 B L0x100 50 B L0x134 51 B L0x168 52 B L0x1a8 53 B L0x1f0 54 B L0x234 55 B L0x258 56 B L0x2b0 57 B L0x2d8 58 B L0x330 59 B L0x364 60 B L0x3a8 61 B L0x3f0 62 L0x78: 63 ADD r12,r0,r1,LSL #1 64 VLD1.8 {d9},[r0],r1 65 VLD1.8 {d11},[r12],r1 66 VLD1.8 {d10},[r0] 67 VLD1.8 {d12},[r12] 68 ADD r12,r2,r3,LSL #1 69 VST1.32 {d9[0]},[r2],r3 70 VST1.32 {d11[0]},[r12],r3 71 VST1.32 {d10[0]},[r2] 72 VST1.32 {d12[0]},[r12] 73 ADD r11,sp,#0 74 B L0x434 75 L0xa8: 76 SUB r0,r0,#2 77 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 78 VRHADD.U8 d22,d22,d14 79 VRHADD.U8 d26,d26,d18 80 VRHADD.U8 d24,d24,d16 81 VRHADD.U8 d28,d28,d20 82 ADD r12,r2,r3,LSL #1 83 VST1.32 {d22[0]},[r2],r3 84 VST1.32 {d26[0]},[r12],r3 85 VST1.32 {d24[0]},[r2] 86 VST1.32 {d28[0]},[r12] 87 ADD r11,sp,#0 88 B L0x434 89 L0xdc: 90 SUB r0,r0,#2 91 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 92 ADD r12,r2,r3,LSL #1 93 VST1.32 {d22[0]},[r2],r3 94 VST1.32 {d26[0]},[r12],r3 95 VST1.32 {d24[0]},[r2] 96 VST1.32 {d28[0]},[r12] 97 ADD r11,sp,#0 98 B L0x434 99 L0x100: 100 SUB r0,r0,#2 101 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 102 VRHADD.U8 d22,d22,d15 103 VRHADD.U8 d26,d26,d19 104 VRHADD.U8 d24,d24,d17 105 VRHADD.U8 d28,d28,d21 106 ADD r12,r2,r3,LSL #1 107 VST1.32 {d22[0]},[r2],r3 108 VST1.32 {d26[0]},[r12],r3 109 VST1.32 {d24[0]},[r2] 110 VST1.32 {d28[0]},[r12] 111 ADD r11,sp,#0 112 B L0x434 113 L0x134: 114 SUB r0,r0,r1,LSL #1 115 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 116 VRHADD.U8 d0,d0,d9 117 VRHADD.U8 d4,d4,d11 118 VRHADD.U8 d2,d2,d10 119 VRHADD.U8 d6,d6,d12 120 ADD r12,r2,r3,LSL #1 121 VST1.32 {d0[0]},[r2],r3 122 VST1.32 {d4[0]},[r12],r3 123 VST1.32 {d2[0]},[r2] 124 VST1.32 {d6[0]},[r12] 125 ADD r11,sp,#0 126 B L0x434 127 L0x168: 128 MOV r8,r0 129 SUB r0,r0,r1,LSL #1 130 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 131 SUB r0,r8,#2 132 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 133 VRHADD.U8 d22,d22,d0 134 VRHADD.U8 d26,d26,d4 135 VRHADD.U8 d24,d24,d2 136 VRHADD.U8 d28,d28,d6 137 ADD r12,r2,r3,LSL #1 138 VST1.32 {d22[0]},[r2],r3 139 VST1.32 {d26[0]},[r12],r3 140 VST1.32 {d24[0]},[r2] 141 VST1.32 {d28[0]},[r12] 142 ADD r11,sp,#0 143 B L0x434 144 L0x1a8: 145 SUB r0,r0,r1,LSL #1 146 SUB r0,r0,#2 147 BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 148 VQRSHRUN.S16 d14,q7,#5 149 VQRSHRUN.S16 d16,q8,#5 150 VQRSHRUN.S16 d18,q9,#5 151 VQRSHRUN.S16 d20,q10,#5 152 VRHADD.U8 d0,d0,d14 153 VRHADD.U8 d4,d4,d18 154 VRHADD.U8 d2,d2,d16 155 VRHADD.U8 d6,d6,d20 156 ADD r12,r2,r3,LSL #1 157 VST1.32 {d0[0]},[r2],r3 158 VST1.32 {d4[0]},[r12],r3 159 VST1.32 {d2[0]},[r2] 160 VST1.32 {d6[0]},[r12] 161 ADD r11,sp,#0 162 B L0x434 163 L0x1f0: 164 MOV r8,r0 165 ADD r0,r0,#1 166 SUB r0,r0,r1,LSL #1 167 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 168 SUB r0,r8,#2 169 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 170 VRHADD.U8 d22,d22,d0 171 VRHADD.U8 d26,d26,d4 172 VRHADD.U8 d24,d24,d2 173 VRHADD.U8 d28,d28,d6 174 ADD r12,r2,r3,LSL #1 175 VST1.32 {d22[0]},[r2],r3 176 VST1.32 {d26[0]},[r12],r3 177 VST1.32 {d24[0]},[r2] 178 VST1.32 {d28[0]},[r12] 179 ADD r11,sp,#0 180 B L0x434 181 L0x234: 182 SUB r0,r0,r1,LSL #1 183 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 184 ADD r12,r2,r3,LSL #1 185 VST1.32 {d0[0]},[r2],r3 186 VST1.32 {d4[0]},[r12],r3 187 VST1.32 {d2[0]},[r2] 188 VST1.32 {d6[0]},[r12] 189 ADD r11,sp,#0 190 B L0x434 191 L0x258: 192 SUB r0,r0,r1,LSL #1 193 SUB r0,r0,#2 194 BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 195 VEXT.8 d18,d18,d19,#4 196 VEXT.8 d20,d20,d21,#4 197 VEXT.8 d22,d22,d23,#4 198 VEXT.8 d24,d24,d25,#4 199 VQRSHRUN.S16 d14,q9,#5 200 VQRSHRUN.S16 d16,q10,#5 201 VQRSHRUN.S16 d18,q11,#5 202 VQRSHRUN.S16 d20,q12,#5 203 VRHADD.U8 d0,d0,d14 204 VRHADD.U8 d4,d4,d18 205 VRHADD.U8 d2,d2,d16 206 VRHADD.U8 d6,d6,d20 207 ADD r12,r2,r3,LSL #1 208 VST1.32 {d0[0]},[r2],r3 209 VST1.32 {d4[0]},[r12],r3 210 VST1.32 {d2[0]},[r2] 211 VST1.32 {d6[0]},[r12] 212 ADD r11,sp,#0 213 B L0x434 214 L0x2b0: 215 SUB r0,r0,r1,LSL #1 216 SUB r0,r0,#2 217 BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 218 ADD r12,r2,r3,LSL #1 219 VST1.32 {d0[0]},[r2],r3 220 VST1.32 {d4[0]},[r12],r3 221 VST1.32 {d2[0]},[r2] 222 VST1.32 {d6[0]},[r12] 223 ADD r11,sp,#0 224 B L0x434 225 L0x2d8: 226 SUB r0,r0,r1,LSL #1 227 SUB r0,r0,#2 228 BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 229 VEXT.8 d18,d18,d19,#6 230 VEXT.8 d20,d20,d21,#6 231 VEXT.8 d22,d22,d23,#6 232 VEXT.8 d24,d24,d25,#6 233 VQRSHRUN.S16 d14,q9,#5 234 VQRSHRUN.S16 d16,q10,#5 235 VQRSHRUN.S16 d18,q11,#5 236 VQRSHRUN.S16 d20,q12,#5 237 VRHADD.U8 d0,d0,d14 238 VRHADD.U8 d4,d4,d18 239 VRHADD.U8 d2,d2,d16 240 VRHADD.U8 d6,d6,d20 241 ADD r12,r2,r3,LSL #1 242 VST1.32 {d0[0]},[r2],r3 243 VST1.32 {d4[0]},[r12],r3 244 VST1.32 {d2[0]},[r2] 245 VST1.32 {d6[0]},[r12] 246 ADD r11,sp,#0 247 B L0x434 248 L0x330: 249 SUB r0,r0,r1,LSL #1 250 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 251 VRHADD.U8 d0,d0,d10 252 VRHADD.U8 d4,d4,d12 253 VRHADD.U8 d2,d2,d11 254 VRHADD.U8 d6,d6,d13 255 ADD r12,r2,r3,LSL #1 256 VST1.32 {d0[0]},[r2],r3 257 VST1.32 {d4[0]},[r12],r3 258 VST1.32 {d2[0]},[r2] 259 VST1.32 {d6[0]},[r12] 260 ADD r11,sp,#0 261 B L0x434 262 L0x364: 263 MOV r8,r0 264 SUB r0,r0,r1,LSL #1 265 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 266 ADD r0,r8,r1 267 SUB r0,r0,#2 268 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 269 VRHADD.U8 d22,d22,d0 270 VRHADD.U8 d26,d26,d4 271 VRHADD.U8 d24,d24,d2 272 VRHADD.U8 d28,d28,d6 273 ADD r12,r2,r3,LSL #1 274 VST1.32 {d22[0]},[r2],r3 275 VST1.32 {d26[0]},[r12],r3 276 VST1.32 {d24[0]},[r2] 277 VST1.32 {d28[0]},[r12] 278 ADD r11,sp,#0 279 B L0x434 280 L0x3a8: 281 SUB r0,r0,r1,LSL #1 282 SUB r0,r0,#2 283 BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 284 VQRSHRUN.S16 d14,q8,#5 285 VQRSHRUN.S16 d16,q9,#5 286 VQRSHRUN.S16 d18,q10,#5 287 VQRSHRUN.S16 d20,q11,#5 288 VRHADD.U8 d0,d0,d14 289 VRHADD.U8 d4,d4,d18 290 VRHADD.U8 d2,d2,d16 291 VRHADD.U8 d6,d6,d20 292 ADD r12,r2,r3,LSL #1 293 VST1.32 {d0[0]},[r2],r3 294 VST1.32 {d4[0]},[r12],r3 295 VST1.32 {d2[0]},[r2] 296 VST1.32 {d6[0]},[r12] 297 ADD r11,sp,#0 298 B L0x434 299 L0x3f0: 300 MOV r8,r0 301 ADD r0,r0,#1 302 SUB r0,r0,r1,LSL #1 303 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 304 ADD r0,r8,r1 305 SUB r0,r0,#2 306 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 307 VRHADD.U8 d22,d22,d0 308 VRHADD.U8 d26,d26,d4 309 VRHADD.U8 d24,d24,d2 310 VRHADD.U8 d28,d28,d6 311 ADD r12,r2,r3,LSL #1 312 VST1.32 {d22[0]},[r2],r3 313 VST1.32 {d26[0]},[r12],r3 314 VST1.32 {d24[0]},[r2] 315 VST1.32 {d28[0]},[r12] 316 ADD r11,sp,#0 317 L0x434: 318 LDM r11,{r0-r3} 319 SUBS r5,r5,#4 320 ADD r0,r0,#4 321 ADD r2,r2,#4 322 BGT L0x2c 323 SUBS r4,r4,#4 324 LDR r5,[sp,#0x80] 325 ADD r11,sp,#0 326 ADD r0,r0,r1,LSL #2 327 ADD r2,r2,r3,LSL #2 328 SUB r0,r0,r5 329 SUB r2,r2,r5 330 BGT L0x2c 331 MOV r0,#0 332 ADD sp,sp,#0x10 333 VPOP {d8-d15} 334 POP {r4-r12,pc} 335 .endfunc 336 337 .end 338 339