1 ;// 2 ;// Copyright (C) 2007-2008 ARM Limited 3 ;// 4 ;// Licensed under the Apache License, Version 2.0 (the "License"); 5 ;// you may not use this file except in compliance with the License. 6 ;// You may obtain a copy of the License at 7 ;// 8 ;// http://www.apache.org/licenses/LICENSE-2.0 9 ;// 10 ;// Unless required by applicable law or agreed to in writing, software 11 ;// distributed under the License is distributed on an "AS IS" BASIS, 12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ;// See the License for the specific language governing permissions and 14 ;// limitations under the License. 15 ;// 16 ;// 17 ;// 18 ;// File Name: omxVCM4P10_PredictIntra_4x4_s.s 19 ;// OpenMAX DL: v1.0.2 20 ;// Revision: 12290 21 ;// Date: Wednesday, April 9, 2008 22 ;// 23 ;// 24 ;// 25 ;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31 ;// Define the processor variants supported by this file 32 33 M_VARIANTS CortexA8 34 35 ;//------------------------------------------------------- 36 ;// This table for implementing switch case of C in asm by 37 ;// the mehtod of two levels of indexing. 38 ;//------------------------------------------------------- 39 40 M_TABLE armVCM4P10_pSwitchTable4x4 41 DCD OMX_VC_4x4_VERT, OMX_VC_4x4_HOR 42 DCD OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL 43 DCD OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR 44 DCD OMX_VC_4x4_HD, OMX_VC_4x4_VL 45 DCD OMX_VC_4x4_HU 46 47 48 IF CortexA8 49 50 ;//-------------------------------------------- 51 ;// Scratch variable 52 ;//-------------------------------------------- 53 return RN 0 54 pTable RN 8 55 pc RN 15 56 57 ;//-------------------------------------------- 58 ;// Declare input registers 59 ;//-------------------------------------------- 60 pSrcLeft RN 0 ;// input pointer 61 pSrcAbove RN 1 ;// input pointer 62 pSrcAboveLeft RN 2 ;// input pointer 63 pDst RN 3 ;// output pointer 64 leftStep RN 4 ;// input variable 65 dstStep RN 5 ;// input variable 66 predMode RN 6 ;// input variable 67 availability RN 7 ;// input variable 68 pDst1 RN 1 69 pDst2 RN 4 70 pDst3 RN 6 71 72 pSrcTmp RN 9 73 srcStep RN 10 74 pDstTmp RN 11 75 dstep RN 12 76 77 ;//------------------- 78 ;// Neon registers 79 ;//------------------- 80 81 ;// OMX_VC_CHROMA_VERT 82 dAboveU32 DN D0.U32 83 84 ;// OMX_VC_CHROMA_HOR 85 dLeftVal0 DN D0.8 86 dLeftVal1 DN D1.8 87 dLeftVal2 DN D2.8 88 dLeftVal3 DN D3.8 89 dLeftVal0U32 DN D0.U32 90 dLeftVal1U32 DN D1.U32 91 dLeftVal2U32 DN D2.U32 92 dLeftVal3U32 DN D3.U32 93 94 ;// OMX_VC_4x4_DC 95 dLeftVal DN D0.U8 96 dLeftValU32 DN D0.U32 97 dSumAboveLeftU16 DN D1.U16 98 dSumAboveLeftU32 DN D1.U32 99 dSumAboveLeftU64 DN D1.U64 100 dSumAboveLeftU8 DN D1.U8 101 dSum DN D0.U8 102 103 dSumLeftValU16 DN D1.U16 104 dSumLeftValU32 DN D1.U32 105 dSumLeftValU64 DN D1.U64 106 dSumLeftValU8 DN D1.U8 107 108 dAboveVal DN D0.U8 109 dSumAboveValU16 DN D1.U16 110 dSumAboveValU32 DN D1.U32 111 dSumAboveValU64 DN D1.U64 112 dSumAboveValU8 DN D1.U8 113 dConst128U8 DN D0.U8 114 115 116 ;//OMX_VC_4x4_DIAG_DL 117 118 dAbove DN D0.U8 119 dU7 DN D2.U8 120 dU3 DN D2.U8 121 dAbove0 DN D3.U8 122 dAbove1 DN D4.U8 123 dAbove2 DN D5.U8 124 dTmp DN D6.U8 125 dTmp0 DN D7.U8 126 dTmp1 DN D8.U8 127 dTmp2 DN D9.U8 128 dTmp3 DN D10.U8 129 dTmpU32 DN D6.U32 130 131 132 ;//OMX_VC_4x4_DIAG_DR 133 dLeft DN D1.U8 134 dUL DN D2.U8 135 136 ;//OMX_VC_4x4_VR 137 dLeft0 DN D1.U8 138 dLeft1 DN D2.U8 139 dEven0 DN D3.U8 140 dEven1 DN D4.U8 141 dEven2 DN D5.U8 142 dOdd0 DN D6.U8 143 dOdd1 DN D11.U8 144 dOdd2 DN D12.U8 145 dTmp3U32 DN D10.U32 146 dTmp2U32 DN D9.U32 147 148 149 ;//OMX_VC_4x4_HD 150 dTmp1U64 DN D8.U64 151 dTmp0U64 DN D7.U64 152 dTmpU64 DN D6.U64 153 dTmpU32 DN D6.U32 154 dTmp1U32 DN D8.U32 155 156 ;//OMX_VC_4x4_HU 157 dL3 DN D2.U8 158 dLeftHU0 DN D3.U8 159 dLeftHU1 DN D4.U8 160 dLeftHU2 DN D5.U8 161 dTmp0U32 DN D7.U32 162 163 164 165 166 ;//----------------------------------------------------------------------------------------------- 167 ;// omxVCM4P10_PredictIntra_4x4 starts 168 ;//----------------------------------------------------------------------------------------------- 169 170 ;// Write function header 171 M_START omxVCM4P10_PredictIntra_4x4, r12,d12 172 173 ;// Define stack arguments 174 M_ARG LeftStep, 4 175 M_ARG DstStep, 4 176 M_ARG PredMode, 4 177 M_ARG Availability, 4 178 179 180 LDR pTable,=armVCM4P10_pSwitchTable4x4 ;// Load index table for switch case 181 182 ;// Load argument from the stack 183 M_LDRD predMode,availability,PredMode ;// Arg predMode & availability loaded from stack to reg 184 M_LDRD leftStep,dstStep,LeftStep ;// Arg leftStep & dstStep loaded from stack to reg 185 186 187 LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode 188 189 190 OMX_VC_4x4_HOR 191 192 ADD pSrcTmp, pSrcLeft, leftStep 193 ADD srcStep, leftStep, leftStep 194 ;// Load Left Edge 195 VLD1 {dLeftVal0[]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] 196 VLD1 {dLeftVal1[]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] 197 VLD1 {dLeftVal2[]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 198 VLD1 {dLeftVal3[]},[pSrcTmp] ;// pSrcLeft[3*leftStep] 199 200 ADD pDstTmp, pDst, dstStep 201 ADD dstep, dstStep, dstStep 202 203 VST1 dLeftVal0U32[0],[pDst],dstep ;// pDst[0*dstStep+x] :0<= x <= 7 204 VST1 dLeftVal1U32[0],[pDstTmp],dstep ;// pDst[1*dstStep+x] :0<= x <= 7 205 VST1 dLeftVal2U32[0],[pDst] ;// pDst[2*dstStep+x] :0<= x <= 7 206 VST1 dLeftVal3U32[0],[pDstTmp] ;// pDst[3*dstStep+x] :0<= x <= 7 207 208 B ExitPredict4x4 ;// Branch to exit code 209 210 OMX_VC_4x4_VERT 211 212 ;// Load Upper Edge 213 VLD1 dAboveU32[0],[pSrcAbove] 214 ADD pDstTmp, pDst, dstStep 215 ADD dstep, dstStep, dstStep 216 217 DCPredict4x4VertStore 218 219 VST1 dAboveU32[0],[pDst],dstep 220 VST1 dAboveU32[0],[pDstTmp],dstep 221 VST1 dAboveU32[0],[pDst] 222 VST1 dAboveU32[0],[pDstTmp] 223 224 B ExitPredict4x4 ;// Branch to exit code 225 226 OMX_VC_4x4_DC 227 228 229 TST availability, #OMX_VC_LEFT 230 BEQ DCPredict4x4LeftNotAvailable 231 232 ADD pSrcTmp, pSrcLeft, leftStep 233 ADD srcStep, leftStep, leftStep 234 ;// Load Left Edge 235 VLD1 {dLeftVal[0]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] 236 VLD1 {dLeftVal[1]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] 237 VLD1 {dLeftVal[2]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 238 VLD1 {dLeftVal[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep] 239 240 TST availability, #OMX_VC_UPPER 241 BEQ DCPredict4x4LeftOnlyAvailable 242 243 ;// Load Upper Edge also 244 VLD1 dLeftValU32[1],[pSrcAbove] ;// pSrcAbove[0 to 3] 245 MOV return, #OMX_Sts_NoErr 246 247 VPADDL dSumAboveLeftU16, dLeftVal ;// [pSrcAbove[2+3 | 0+1] | pSrcLeft[2+3 | 0+1]] 248 VPADDL dSumAboveLeftU32, dSumAboveLeftU16 ;// [pSrcAbove[2+3+0+1] | pSrcLeft[2+3+0+1]] 249 VPADDL dSumAboveLeftU64, dSumAboveLeftU32 ;// [pSrcAbove[2+3+0+1] + pSrcLeft[2+3+0+1]] 250 VRSHR dSumAboveLeftU64,dSumAboveLeftU64,#3 ;// Sum = (Sum + 4) >> 3 251 ADD pDstTmp, pDst, dstStep 252 ADD dstep, dstStep, dstStep 253 VDUP dSum,dSumAboveLeftU8[0] 254 255 B DCPredict4x4VertStore 256 257 DCPredict4x4LeftOnlyAvailable 258 259 MOV return, #OMX_Sts_NoErr ;// returnNoError 260 261 VPADDL dSumLeftValU16, dLeftVal ;// [ XX | pSrcLeft[2+3 | 0+1]] 262 VPADDL dSumLeftValU32, dSumLeftValU16 ;// [ XXXX | pSrcLeft[2+3+0+1]] 263 264 VRSHR dSumLeftValU32,dSumLeftValU32,#2 ;// Sum = (Sum + 2) >> 2 265 ADD pDstTmp, pDst, dstStep 266 ADD dstep, dstStep, dstStep 267 VDUP dSum,dSumLeftValU8[0] 268 269 B DCPredict4x4VertStore 270 271 DCPredict4x4LeftNotAvailable 272 273 TST availability, #OMX_VC_UPPER 274 BEQ DCPredict4x4NoneAvailable 275 276 ;// Load Upper Edge 277 VLD1 dAboveU32[0],[pSrcAbove] ;// pSrcAbove[0 to 3] 278 MOV return, #OMX_Sts_NoErr 279 280 VPADDL dSumAboveValU16, dAboveVal ;// [ XX | pSrcAbove[2+3 | 0+1]] 281 VPADDL dSumAboveValU32, dSumAboveValU16 ;// [ XXXX | pSrcAbove[2+3+0+1]] 282 283 VRSHR dSumAboveValU32,dSumAboveValU32,#2 ;// Sum = (Sum + 2) >> 2 284 ADD pDstTmp, pDst, dstStep 285 ADD dstep, dstStep, dstStep 286 VDUP dSum,dSumAboveValU8[0] 287 288 B DCPredict4x4VertStore 289 290 DCPredict4x4NoneAvailable 291 292 VMOV dConst128U8,#0x80 ;// 0x8080808080808080 if(count == 0) 293 MOV return, #OMX_Sts_NoErr 294 295 ADD pDstTmp, pDst, dstStep 296 ADD dstep, dstStep, dstStep 297 B DCPredict4x4VertStore 298 299 300 301 OMX_VC_4x4_DIAG_DL 302 303 TST availability, #OMX_VC_UPPER_RIGHT 304 BEQ DiagDLUpperRightNotAvailable 305 306 VLD1 dAbove0,[pSrcAbove] ;// [U7|U6|U5|U4|U3|U2|U1|U0] 307 VDUP dU7, dAbove0[7] ;// [U7|U7|U7|U7|U7|U7|U7|U7] 308 VEXT dAbove1, dAbove0, dU7, #1 ;// [U7|U7|U6|U5|U4|U3|U2|U1] 309 VEXT dAbove2, dAbove0, dU7, #2 ;// [U7|U7|U7|U6|U5|U4|U3|U2] 310 B DiagDLPredict4x4Store 311 312 DiagDLUpperRightNotAvailable 313 VLD1 dAboveU32[1],[pSrcAbove] ;// [U3|U2|U1|U0|-|-|-|-] 314 VDUP dU3, dAbove[7] ;// [U3 U3 U3 U3 U3 U3 U3 U3] 315 316 VEXT dAbove0, dAbove, dU3, #4 ;// [U3 U3 U3 U3 U3 U2 U1 U0] 317 VEXT dAbove1, dAbove, dU3, #5 ;// [U3 U3 U3 U3 U3 U3 U2 U1] 318 VEXT dAbove2, dAbove, dU3, #6 ;// [U3 U3 U3 U3 U3 U3 U3 U2] 319 320 DiagDLPredict4x4Store 321 322 VHADD dTmp, dAbove0, dAbove2 323 VRHADD dTmp, dTmp, dAbove1 ;// (a+2*b+c+2)>>2 324 325 326 VST1 dTmpU32[0],[pDst],dstStep 327 VEXT dTmp,dTmp,dTmp,#1 328 VST1 dTmpU32[0],[pDst],dstStep 329 VEXT dTmp,dTmp,dTmp,#1 330 VST1 dTmpU32[0],[pDst],dstStep 331 VEXT dTmp,dTmp,dTmp,#1 332 VST1 dTmpU32[0],[pDst] 333 334 B ExitPredict4x4 ;// Branch to exit code 335 336 337 OMX_VC_4x4_DIAG_DR 338 339 340 ;// Load U0,U1,U2,U3 341 342 VLD1 dAboveU32[0],[pSrcAbove] ;// [X|X|X|X|U3|U2|U1|U0] 343 344 ;// Load UL,L0,L1,L2,L3 ;// dLeft = [UL|L0|L1|L2|L3|X|X|X] 345 VLD1 {dLeft[7]},[pSrcAboveLeft] 346 ADD pSrcTmp, pSrcLeft, leftStep 347 ADD srcStep, leftStep, leftStep 348 ADD pDst1,pDst,dstStep 349 350 VLD1 {dLeft[6]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] 351 VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] 352 VLD1 {dLeft[4]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 353 VLD1 {dLeft[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep] 354 355 356 VEXT dAbove0,dLeft,dAbove,#3 ;// [U2|U1|U0|UL|L0|L1|L2|L3] 357 ADD pDst2,pDst1,dstStep 358 VEXT dAbove1,dLeft,dAbove,#4 ;// [U3|U2|U1|U0|UL|L0|L1|L2] 359 ADD pDst3,pDst2,dstStep 360 VEXT dAbove2,dLeft,dAbove,#5 ;// [ X|U3|U2|U1|U0|UL|L0|L1] 361 362 VHADD dTmp, dAbove0, dAbove2 363 VRHADD dTmp, dTmp, dAbove1 ;// (a+2*b+c+2)>>2 364 365 366 VST1 dTmpU32[0],[pDst3] ;// Store pTmp[0],[1],[2],[3] @ pDst3 367 VEXT dTmp,dTmp,dTmp,#1 368 VST1 dTmpU32[0],[pDst2] ;// Store pTmp[1],[2],[3],[4] @ pDst2 369 VEXT dTmp,dTmp,dTmp,#1 370 VST1 dTmpU32[0],[pDst1] ;// Store pTmp[2],[3],[4],[5] @ pDst1 371 VEXT dTmp,dTmp,dTmp,#1 372 VST1 dTmpU32[0],[pDst] ;// Store pTmp[3],[4],[5],[6] @ pDst 373 374 B ExitPredict4x4 ;// Branch to exit code 375 376 OMX_VC_4x4_VR 377 378 379 ;// Load UL,U0,U1,U2,U3 380 VLD1 dAboveU32[0],[pSrcAbove] 381 VLD1 dAbove[7],[pSrcAboveLeft] ;// [UL|X|X|X|U3|U2|U1|U0] 382 383 ;// Load L0,L1,L2 ;// dLeft0 = [L0|L2|X|X|X|X|X|X] 384 ;// dLeft1 = [L1| X|X|X|X|X|X|X] 385 VLD1 {dLeft0[7]},[pSrcLeft],leftStep ;// pSrcLeft[0*leftStep] 386 VLD1 {dLeft1[7]},[pSrcLeft],leftStep ;// pSrcLeft[1*leftStep] 387 VLD1 {dLeft0[6]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 388 389 390 VEXT dOdd2,dAbove,dAbove,#7 ;// [ x x x U3 U2 U1 U0 UL ] 391 VEXT dEven0,dLeft0,dOdd2,#6 ;// [ x x x U1 U0 UL L0 L2 ] 392 VEXT dEven1,dLeft1,dOdd2,#7 ;// [ x x x U2 U1 U0 UL L1 ] 393 VEXT dEven2,dLeft0,dAbove,#7 ;// [ x x x U3 U2 U1 U0 L0 ] 394 VEXT dOdd0,dLeft1,dAbove,#7 ;// [ x x x U3 U2 U1 U0 L1 ] 395 VEXT dOdd1,dLeft0,dOdd2,#7 ;// [ x x x U2 U1 U0 UL L0 ] 396 397 VHADD dTmp1, dOdd0, dOdd2 398 VRHADD dTmp1, dTmp1, dOdd1 ;// Tmp[ x x x 9 7 5 3 1 ] 399 400 VHADD dTmp0, dEven0, dEven2 401 VRHADD dTmp0, dTmp0, dEven1 ;// Tmp[ x x x 8 6 4 2 0 ] 402 403 404 VEXT dTmp3,dTmp1,dTmp1,#1 ;// Tmp[ x x x x 9 7 5 3 ] 405 ADD pDstTmp, pDst, dstStep 406 ADD dstep, dstStep, dstStep 407 VEXT dTmp2,dTmp0,dTmp0,#1 ;// Tmp[ x x x x 8 6 4 2 ] 408 409 410 VST1 dTmp3U32[0],[pDst],dstep ;// Tmp[9],[7],[5],[3] 411 VST1 dTmp2U32[0],[pDstTmp],dstep ;// Tmp[8],[6],[4],[2] 412 VST1 dTmp1U32[0],[pDst],dstep ;// Tmp[7],[5],[3],[1] 413 VST1 dTmp0U32[0],[pDstTmp] ;// Tmp[6],[4],[2],[0] 414 415 B ExitPredict4x4 ;// Branch to exit code 416 417 OMX_VC_4x4_HD 418 419 420 ;// Load U0,U1,U2,U3 421 VLD1 dAbove,[pSrcAbove] ;//dAboveLeftVal = [U7|U6|U5|U4|U3|U2|U1|U0] 422 423 ;// Load UL,L0,L1,L2,L3 ;// dLeft = [UL|L0|L1|L2|L3|X|X|X] 424 VLD1 {dLeft[7]},[pSrcAboveLeft] 425 ADD pSrcTmp, pSrcLeft, leftStep 426 ADD srcStep, leftStep, leftStep 427 428 VLD1 {dLeft[6]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] 429 VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] 430 VLD1 {dLeft[4]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 431 VLD1 {dLeft[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep] 432 433 VEXT dAbove0,dLeft,dAbove,#3 ;// [ U2|U1|U0|UL|L0|L1|L2|L3 ] 434 VEXT dAbove1,dLeft,dAbove,#2 ;// [ U1|U0|UL|L0|L1|L2|L3|X ] 435 VEXT dAbove2,dLeft,dAbove,#1 ;// [ U0|UL|L0|L1|L2|L3|X|X ] 436 437 VHADD dTmp0, dAbove0, dAbove2 438 VRHADD dTmp0, dTmp0, dAbove1 ;// Tmp[ 0 | 1 | 2 | 4 | 6 | 8 | X | X ] 439 440 441 VRHADD dTmp1, dAbove1, dAbove0 ;// (a+b+1)>>1 442 VSHL dTmp1U64,dTmp1U64,#24 ;// Tmp[ 3|5| 7 |9 | X | X | X | X ] 443 444 445 VSHL dTmpU64,dTmp0U64,#16 ;// Tmp[ 2|4|6|8| X | X | X | X ] 446 VZIP dTmp1,dTmp ;// dTmp = [ 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 ] 447 VEXT dTmp0,dTmp0,dTmp0,#6 ;// Tmp[ X| X| X| X| X| X| 0 | 1 ] 448 VEXT dTmp1,dTmp,dTmp0,#2 ;// Tmp[ 0 | 1 | 2 | 3 | 4 | 5 | 6 |7 ] 449 450 ADD pDstTmp, pDst, dstStep 451 ADD dstep, dstStep, dstStep 452 453 VST1 dTmp1U32[1],[pDst],dstep ;// Store pTmp[0|1|2|3] 454 VST1 dTmpU32[1],[pDstTmp],dstep ;// Store pTmp[2|3|4|5] 455 VST1 dTmp1U32[0],[pDst] ;// Store pTmp[4|5|6|7] 456 VST1 dTmpU32[0],[pDstTmp] ;// Store pTmp[6|7|8|9] 457 458 B ExitPredict4x4 ;// Branch to exit code 459 460 OMX_VC_4x4_VL 461 462 463 TST availability, #OMX_VC_UPPER_RIGHT 464 BEQ DiagVLUpperRightNotAvailable 465 466 VLD1 dAbove0,[pSrcAbove] ;// [U7|U6|U5|U4|U3|U2|U1|U0] 467 VEXT dAbove1,dAbove0,dAbove0,#1 ;// [ X|U7|U6|U5|U4|U3|U2|U1] 468 VEXT dAbove2,dAbove1,dAbove1,#1 ;// [ X| X|U7|U6|U5|U4|U3|U2] 469 470 B DiagVLPredict4x4Store 471 472 DiagVLUpperRightNotAvailable 473 VLD1 dAboveU32[1],[pSrcAbove] ;// [U3|U2|U1|U0|-|-|-|-] 474 VDUP dU3, dAbove[7] ;// [U3 U3 U3 U3 U3 U3 U3 U3] 475 476 VEXT dAbove0, dAbove, dU3, #4 ;// [U3 U3 U3 U3 U3 U2 U1 U0] 477 VEXT dAbove1, dAbove, dU3, #5 ;// [U3 U3 U3 U3 U3 U3 U2 U1] 478 VEXT dAbove2, dAbove, dU3, #6 ;// [U3 U3 U3 U3 U3 U3 U3 U2] 479 480 DiagVLPredict4x4Store 481 482 VRHADD dTmp0, dAbove1, dAbove0 ;// (a+b+1)>>1 483 ;// Tmp[ X| X| X| 8| 6| 4| 2| 0 ] 484 485 VHADD dTmp3, dAbove0, dAbove2 486 VRHADD dTmp3, dTmp3, dAbove1 ;// (a+2*b+c+2)>>2 487 ;// Tmp[ X| X| X| 9| 7| 5| 3| 1 ] 488 489 VEXT dTmp1,dTmp0,dTmp0,#1 ;// Tmp[ X| X| X| X| 8| 6| 4| 2 ] 490 ADD pDstTmp, pDst, dstStep 491 ADD dstep, dstStep, dstStep 492 VEXT dTmp2,dTmp3,dTmp1,#1 ;// Tmp[ X| X| X| X| 9| 7| 5| 3 ] 493 494 VST1 dTmp0U32[0],[pDst],dstep ;// Tmp[6],[4],[2],[0] 495 VST1 dTmp3U32[0],[pDstTmp],dstep ;// Tmp[7],[5],[3],[1] 496 VST1 dTmp1U32[0],[pDst] ;// Tmp[8],[6],[4],[2] 497 VST1 dTmp2U32[0],[pDstTmp] ;// Tmp[9],[7],[5],[3] 498 499 B ExitPredict4x4 ;// Branch to exit code 500 501 OMX_VC_4x4_HU 502 ADD pSrcTmp, pSrcLeft, leftStep 503 ADD srcStep, leftStep, leftStep 504 505 ;// Load Left Edge ;// [L3|L2|L1|L0|X|X|X|X] 506 VLD1 {dLeft[4]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] 507 VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] 508 VLD1 {dLeft[6]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 509 VLD1 {dLeft[7]},[pSrcTmp] ;// pSrcLeft[3*leftStep] 510 511 VDUP dL3,dLeft[7] ;// [L3|L3|L3|L3|L3|L3|L3|L3] 512 513 VEXT dLeftHU0,dLeft,dL3,#4 ;// [L3|L3|L3|L3|L3|L2|L1|L0] 514 VEXT dLeftHU1,dLeft,dL3,#5 ;// [L3|L3|L3|L3|L3|L3|L2|L1] 515 VEXT dLeftHU2,dLeft,dL3,#6 ;// [L3|L3|L3|L3|L3|L3|L3|L2] 516 517 VHADD dTmp0, dLeftHU0, dLeftHU2 518 VRHADD dTmp0, dTmp0, dLeftHU1 ;// Tmp[ L3 | L3 | L3 | L3 | L3 | 5 | 3 | 1 ] 519 520 VRHADD dTmp1, dLeftHU1, dLeftHU0 ;// (a+b+1)>>1 521 ;// Tmp[ L3 | L3 | L3 | L3 | L3 | 4 | 2 | 0 ] 522 523 VZIP dTmp1,dTmp0 ;// dTmp1 = Tmp[7| 6| 5| 4| 3| 2| 1| 0] 524 ;// dTmp0 = [L3|L3|L3|L3|L3|L3|L3|L3] 525 526 527 VST1 dTmp1U32[0],[pDst],dstStep ;// [3|2|1|0] 528 VEXT dTmp1,dTmp1,dTmp1,#2 529 VST1 dTmp1U32[0],[pDst],dstStep ;// [5|4|3|2] 530 VEXT dTmp1,dTmp1,dTmp1,#2 531 VST1 dTmp1U32[0],[pDst],dstStep ;// [7|6|5|4] 532 VST1 dTmp0U32[0],[pDst] ;// [9|8|7|6] 533 534 535 ExitPredict4x4 536 537 MOV return, #OMX_Sts_NoErr 538 M_END 539 540 ENDIF ;// CortexA8 541 542 END 543 ;//----------------------------------------------------------------------------------------------- 544 ;// omxVCM4P10_PredictIntra_4x4 ends 545 ;//----------------------------------------------------------------------------------------------- 546