1 ;// 2 ;// 3 ;// File Name: omxVCM4P10_PredictIntra_16x16_s.s 4 ;// OpenMAX DL: v1.0.2 5 ;// Revision: 9641 6 ;// Date: Thursday, February 7, 2008 7 ;// 8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ;// 10 ;// 11 ;// 12 13 INCLUDE omxtypes_s.h 14 INCLUDE armCOMM_s.h 15 16 M_VARIANTS ARM1136JS 17 18 ;//------------------------------------------------------- 19 ;// This table for implementing switch case of C in asm by 20 ;// the mehtod of two levels of indexing. 21 ;//------------------------------------------------------- 22 23 M_TABLE armVCM4P10_pIndexTable16x16 24 DCD OMX_VC_16X16_VERT, OMX_VC_16X16_HOR 25 DCD OMX_VC_16X16_DC, OMX_VC_16X16_PLANE 26 27 IF ARM1136JS 28 29 ;//-------------------------------------------- 30 ;// Constants 31 ;//-------------------------------------------- 32 BLK_SIZE EQU 0x10 33 MUL_CONST0 EQU 0x01010101 34 MUL_CONST1 EQU 0x00060004 35 MUL_CONST2 EQU 0x00070005 36 MUL_CONST3 EQU 0x00030001 37 MASK_CONST EQU 0x00FF00FF 38 39 ;//-------------------------------------------- 40 ;// Scratch variable 41 ;//-------------------------------------------- 42 y RN 12 43 pc RN 15 44 45 return RN 0 46 innerCount RN 0 47 outerCount RN 1 48 pSrcLeft2 RN 1 49 pDst2 RN 2 50 sum RN 6 51 pTable RN 9 52 temp1 RN 10 53 temp2 RN 12 54 cMul1 RN 11 55 cMul2 RN 12 56 count RN 12 57 dstStepx2 RN 11 58 leftStepx2 RN 14 59 r0x01010101 RN 10 60 r0x00FF00FF RN 11 61 62 tVal0 RN 0 63 tVal1 RN 1 64 tVal2 RN 2 65 tVal3 RN 3 66 tVal4 RN 4 67 tVal5 RN 5 68 tVal6 RN 6 69 tVal7 RN 7 70 tVal8 RN 8 71 tVal9 RN 9 72 tVal10 RN 10 73 tVal11 RN 11 74 tVal12 RN 12 75 tVal14 RN 14 76 77 b RN 12 78 c RN 14 79 80 p2p0 RN 0 81 p3p1 RN 1 82 p6p4 RN 2 83 p7p5 RN 4 84 p10p8 RN 6 85 p11p9 RN 7 86 p14p12 RN 8 87 p15p13 RN 9 88 89 p3210 RN 10 90 p7654 RN 10 91 p111098 RN 10 92 p15141312 RN 10 93 94 ;//-------------------------------------------- 95 ;// Declare input registers 96 ;//-------------------------------------------- 97 pSrcLeft RN 0 ;// input pointer 98 pSrcAbove RN 1 ;// input pointer 99 pSrcAboveLeft RN 2 ;// input pointer 100 pDst RN 3 ;// output pointer 101 leftStep RN 4 ;// input variable 102 dstStep RN 5 ;// input variable 103 predMode RN 6 ;// input variable 104 availability RN 7 ;// input variable 105 106 ;//----------------------------------------------------------------------------------------------- 107 ;// omxVCM4P10_PredictIntra_16x16 starts 108 ;//----------------------------------------------------------------------------------------------- 109 110 ;// Write function header 111 M_START omxVCM4P10_PredictIntra_16x16, r11 112 113 ;// Define stack arguments 114 M_ARG LeftStep, 4 115 M_ARG DstStep, 4 116 M_ARG PredMode, 4 117 M_ARG Availability, 4 118 119 ;// M_STALL ARM1136JS=4 120 121 LDR pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case 122 123 ;// Load argument from the stack 124 M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg 125 M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg 126 M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg 127 M_LDR availability, Availability ;// Arg availability loaded from stack to reg 128 129 MOV y, #BLK_SIZE ;// Outer Loop Count 130 LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode 131 132 OMX_VC_16X16_VERT 133 LDM pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15] 134 ADD dstStepx2, dstStep, dstStep ;// double dstStep 135 ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 136 137 ;// M_STALL ARM1136JS=2 ;// Stall outside the loop 138 139 LOOP_VERT 140 STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 141 SUBS y, y, #2 ;// y-- 142 ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep 143 STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 144 ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep 145 BNE LOOP_VERT ;// Loop for 8 times 146 MOV return, #OMX_Sts_NoErr 147 M_EXIT 148 149 150 OMX_VC_16X16_HOR 151 152 ;// M_STALL ARM1136JS=6 153 154 LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times 155 MOV y, #4 ;// Outer Loop Count 156 M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] 157 ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 158 M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal1 = pSrcLeft[4 to 7] 159 ADD dstStepx2, dstStep, dstStep ;// double dstStep 160 SUB dstStepx2, dstStepx2, #12 ;// double dstStep minus 12 161 162 LOOP_HOR 163 M_LDRB tVal8, [pSrcLeft], +leftStep ;// tVal8 = pSrcLeft[0 to 3] 164 MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes 165 M_LDRB tVal9, [pSrcLeft], +leftStep ;// tVal9 = pSrcLeft[4 to 7] 166 MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes 167 SUBS y, y, #1 ;// y-- 168 STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] 169 STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 170 STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] 171 STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] 172 MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes 173 STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] 174 STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] 175 MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes 176 M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] 177 M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] 178 STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] 179 STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 180 STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] 181 STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] 182 STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] 183 STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] 184 M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] 185 M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] 186 M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] 187 M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal7 = pSrcLeft[4 to 7] 188 BNE LOOP_HOR ;// Loop for 3 times 189 MOV return, #OMX_Sts_NoErr 190 M_EXIT 191 192 OMX_VC_16X16_DC 193 194 ;// M_STALL ARM1136JS=2 195 196 MOV count, #0 ;// count = 0 197 TST availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) 198 BEQ TST_LEFT ;// Jump to Left if not upper 199 LDM pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15] 200 ADD count, count, #1 ;// if upper inc count by 1 201 202 ;// M_STALL ARM1136JS=2 203 204 UXTB16 tVal2, tVal8 ;// pSrcAbove[0, 2] 205 UXTB16 tVal6, tVal9 ;// pSrcAbove[4, 6] 206 UADD16 tVal2, tVal2, tVal6 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] 207 UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] 208 UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] 209 UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7] 210 UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[7]) 211 212 UXTB16 tVal8, tVal10 ;// pSrcAbove[8, 10] 213 UXTB16 tVal9, tVal11 ;// pSrcAbove[12, 14] 214 UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14] 215 UXTB16 tVal10, tVal10, ROR #8 ;// pSrcAbove[9, 11] 216 UXTB16 tVal11, tVal11, ROR #8 ;// pSrcAbove[13, 15] 217 UADD16 tVal10, tVal10, tVal11 ;// pSrcAbove[9, 11] + pSrcAbove[13, 15] 218 UADD16 tVal8, tVal8, tVal10 ;// sum(pSrcAbove[8] to pSrcAbove[15]) 219 220 UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[15]) 221 222 ;// M_STALL ARM1136JS=1 223 224 ADD tVal2, tVal2, tVal2, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[15]) 225 226 ;// M_STALL ARM1136JS=1 227 228 UXTH sum, tVal2 ;// Extract the lower half for result 229 230 TST_LEFT 231 TST availability, #OMX_VC_LEFT 232 BEQ TST_COUNT 233 ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep 234 ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep 235 236 M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 237 M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 238 M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 239 M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 240 ADD tVal7, tVal8, tVal9 ;// tVal7 = tVal8 + tVal9 241 ADD count, count, #1 ;// Inc Counter if Left is available 242 ADD tVal6, tVal10, tVal11 ;// tVal6 = tVal10 + tVal11 243 244 M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 245 M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 246 M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 247 M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 248 ADD sum, tVal7, tVal6 ;// sum = tVal8 + tVal10 249 ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 250 ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 251 ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 252 253 254 M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 255 M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 256 M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 257 M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 258 ADD sum, sum, tVal7 ;// sum = sum + tVal7 259 ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 260 ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 261 ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 262 263 264 M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 265 M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 266 M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 267 M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 268 ADD sum, sum, tVal7 ;// sum = sum + tVal7 269 ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 270 ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 271 ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 272 ADD sum, sum, tVal7 ;// sum = sum + tVal7 273 274 TST_COUNT 275 CMP count, #0 ;// if(count == 0) 276 MOVEQ sum, #128 ;// sum = 128 if(count == 0) 277 BEQ TST_COUNT0 ;// if(count == 0) 278 CMP count, #1 ;// if(count == 1) 279 ADDEQ sum, sum, #8 ;// sum += 8 if(count == 1) 280 ADDNE sum, sum, tVal2 ;// sum = sumleft + sumupper 281 ADDNE sum, sum, #16 ;// sum += 16 if(count == 2) 282 283 ;// M_STALL ARM1136JS=1 284 285 UXTH sum, sum ;// sum only byte rest cleared 286 287 ;// M_STALL ARM1136JS=1 288 289 LSREQ sum, sum, #4 ;// sum >> 4 if(count == 1) 290 291 ;// M_STALL ARM1136JS=1 292 293 LSRNE sum, sum, #5 ;// sum >> 5 if(count == 2) 294 295 TST_COUNT0 296 297 ;// M_STALL ARM1136JS=1 298 299 ORR sum, sum, sum, LSL #8 ;// sum replicated in two halfword 300 301 ;// M_STALL ARM1136JS=1 302 303 ORR tVal6, sum, sum, LSL #16 ;// sum replicated in all bytes 304 CPY tVal7, tVal6 ;// tVal1 = tVal0 305 CPY tVal8, tVal6 ;// tVal2 = tVal0 306 CPY tVal9, tVal6 ;// tVal3 = tVal0 307 ADD dstStepx2, dstStep, dstStep ;// double dstStep 308 ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 309 MOV y, #BLK_SIZE ;// Outer Loop Count 310 311 LOOP_DC 312 STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 313 SUBS y, y, #2 ;// y-- 314 ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep 315 STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 316 ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep 317 BNE LOOP_DC ;// Loop for 8 times 318 319 MOV return, #OMX_Sts_NoErr 320 M_EXIT 321 322 OMX_VC_16X16_PLANE 323 324 ;// M_STALL ARM1136JS=3 325 RSB tVal14, leftStep, leftStep, LSL #4 ;// tVal14 = 15*leftStep 326 327 ;// M_STALL ARM1136JS=2 328 LDRB tVal10, [pSrcLeft, tVal14] ;// tVal10 = pSrcLeft[15*leftStep] 329 LDRB tVal11, [pSrcAboveLeft] ;// tVal11 = pSrcAboveLeft[0] 330 LDRB tVal12, [pSrcAbove, #15] 331 332 ADD tVal2, tVal12, tVal10 ;// tVal2 = pSrcAbove[15] + pSrcLeft[15*leftStep] 333 SUB tVal10, tVal10, tVal11 ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0] 334 SUB tVal11, tVal12, tVal11 ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0] 335 MOV tVal2, tVal2, LSL #4 ;// tVal2 = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep]) 336 337 MOV tVal11, tVal11, LSL #3 ;// 8*[15]-[-1] 338 LDRB tVal6, [pSrcAbove, #0] 339 LDRB tVal7, [pSrcAbove, #14] 340 SUB tVal8, tVal7, tVal6 341 RSB tVal8, tVal8, tVal8, LSL #3 ;// 7*[14]-[0] 342 ADD tVal11, tVal11, tVal8 343 LDRB tVal6, [pSrcAbove, #1] 344 LDRB tVal7, [pSrcAbove, #13] 345 SUB tVal8, tVal7, tVal6 346 ADD tVal8, tVal8, tVal8 347 ADD tVal8, tVal8, tVal8, LSL #1 ;// 6*[13]-[1] 348 ADD tVal11, tVal11, tVal8 349 LDRB tVal6, [pSrcAbove, #2] 350 LDRB tVal7, [pSrcAbove, #12] 351 SUB tVal8, tVal7, tVal6 352 ADD tVal8, tVal8, tVal8, LSL #2 ;// 5*[12]-[2] 353 ADD tVal11, tVal11, tVal8 354 LDRB tVal6, [pSrcAbove, #3] 355 LDRB tVal7, [pSrcAbove, #11] 356 SUB tVal8, tVal7, tVal6 357 ADD tVal11, tVal11, tVal8, LSL #2 ;// + 4*[11]-[3] 358 LDRB tVal6, [pSrcAbove, #4] 359 LDRB tVal7, [pSrcAbove, #10] 360 SUB tVal8, tVal7, tVal6 361 ADD tVal8, tVal8, tVal8, LSL #1 ;// 3*[10]-[4] 362 ADD tVal11, tVal11, tVal8 363 LDRB tVal6, [pSrcAbove, #5] 364 LDRB tVal7, [pSrcAbove, #9] 365 SUB tVal8, tVal7, tVal6 366 ADD tVal11, tVal11, tVal8, LSL #1 ;// + 2*[9]-[5] 367 LDRB tVal6, [pSrcAbove, #6] 368 LDRB tVal7, [pSrcAbove, #8] 369 SUB tVal8, tVal7, tVal6 ;// 1*[8]-[6] 370 ADD tVal7, tVal11, tVal8 371 372 ADD tVal2, tVal2, #16 ;// tVal2 = a + 16 373 MOV tVal1, pSrcLeft ;// tVal4 = pSrcLeft 374 SUB tVal9, tVal14, leftStep ;// tVal9 = 14*leftStep 375 ADD tVal9, pSrcLeft, tVal9 ;// tVal9 = pSrcLeft + 14*leftStep 376 377 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[14*leftStep] 378 M_LDRB tVal11, [tVal1], +leftStep ;// tVal11 = pSrcLeft[0] 379 ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * H 380 ADD tVal7, tVal7, #32 ;// tVal7 = 5 * H + 32 381 SUB tVal8, tVal8, tVal11 ;// tVal8 = pSrcLeft[14*leftStep] - pSrcLeft[0] 382 ASR tVal12, tVal7, #6 ;// tVal12 = b = (5 * H + 32) >> 6 383 384 RSB tVal8, tVal8, tVal8, LSL #3 ;// tVal8 = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0]) 385 ADD tVal6, tVal8, tVal10, LSL #3 ;// tVal6 = V = V0 +V1 386 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[13*leftStep] 387 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[leftStep] 388 RSB tVal7, tVal12, tVal12, LSL #3 ;// tVal7 = 7*b 389 SUB tVal2, tVal2, tVal7 ;// tVal2 = a + 16 - 7*b 390 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[13*leftStep] - pSrcLeft[leftStep] 391 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[12*lS] 392 ADD tVal7, tVal7, tVal7 ;// tVal7 = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) 393 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[2*leftStep] 394 ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) 395 ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V2 396 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep] 397 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[11*leftStep] 398 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[3*leftStep] 399 ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]) 400 ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V3 401 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep] 402 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[10*leftStep] 403 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[4*leftStep] 404 ADD tVal6, tVal6, tVal7, LSL #2 ;// tVal6 = V = V + V4 405 SUB dstStep, dstStep, #16 ;// tVal5 = dstStep - 16 406 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep] 407 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[9*leftStep] 408 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[5*leftStep] 409 ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]) 410 ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V5 411 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep] 412 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[8*leftStep] 413 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[6*leftStep] 414 ADD tVal6, tVal6, tVal7, LSL #1 ;// tVal6 = V = V + V6 415 416 ;// M_STALL ARM1136JS=1 417 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep] 418 ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V7 419 420 ;// M_STALL ARM1136JS=1 421 ADD tVal6, tVal6, tVal6, LSL #2 ;// tVal6 = 5*V 422 ADD tVal6, tVal6, #32 ;// tVal6 = 5*V + 32 423 424 ;// M_STALL ARM1136JS=1 425 ASR tVal14, tVal6, #6 ;// tVal14 = c = (5*V + 32)>>6 426 427 ;// M_STALL ARM1136JS=1 428 RSB tVal6, tVal14, tVal14, LSL #3 ;// tVal6 = 7*c 429 UXTH tVal14, tVal14 ;// tVal14 = Cleared the upper half word 430 ADD tVal10, tVal12, tVal12 ;// tVal10 = 2*b 431 ORR tVal14, tVal14, tVal14, LSL #16 ;// tVal14 = {c , c} 432 SUB tVal6, tVal2, tVal6 ;// tVal6 = d = a - 7*b - 7*c + 16 433 ADD tVal1, tVal6, tVal10 ;// tVal1 = pp2 = d + 2*b 434 ADD tVal10, tVal10, tVal12 ;// tVal10 =3*b 435 ORR tVal0, tVal6, tVal1, LSL #16 ;// tval0 = p2p0 = pack {p2, p0} 436 UXTH tVal12, tVal12 ;// tVal12 = Cleared the upper half word 437 UXTH tVal10, tVal10 ;// tVal12 = Cleared the upper half word 438 ORR tVal12, tVal12, tVal12, LSL #16 ;// tVal12 = {b , b} 439 ORR tVal10, tVal10, tVal10, LSL #16 ;// tVal10 = {3b , 3b} 440 SADD16 tVal1, tVal0, tVal12 ;// tVal1 = p3p1 = p2p0 + {b,b} 441 SADD16 tVal2, tVal1, tVal10 ;// tVal2 = p6p4 = p3p1 + {3b,3b} 442 SADD16 tVal4, tVal2, tVal12 ;// tVal4 = p7p5 = p6p4 + {b,b} 443 SADD16 tVal6, tVal4, tVal10 ;// tVal6 = p10p8 = p7p5 + {3b,3b} 444 SADD16 tVal7, tVal6, tVal12 ;// tVal7 = p11p9 = p10p8 + {b,b} 445 SADD16 tVal8, tVal7, tVal10 ;// tVal8 = p14p12 = p11p9 + {3b,3b} 446 SADD16 tVal9, tVal8, tVal12 ;// tVal9 = p15p13 = p14p12 + {b,b} 447 LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF 448 449 LOOP_PLANE 450 451 USAT16 temp2, #13, p3p1 452 USAT16 temp1, #13, p2p0 453 SADD16 p3p1, p3p1, c 454 SADD16 p2p0, p2p0, c 455 AND temp2, r0x00FF00FF, temp2, ASR #5 456 AND temp1, r0x00FF00FF, temp1, ASR #5 457 ORR temp1, temp1, temp2, LSL #8 458 STR temp1, [pDst], #4 459 460 USAT16 temp2, #13, p7p5 461 USAT16 temp1, #13, p6p4 462 SADD16 p7p5, p7p5, c 463 SADD16 p6p4, p6p4, c 464 AND temp2, r0x00FF00FF, temp2, ASR #5 465 AND temp1, r0x00FF00FF, temp1, ASR #5 466 ORR temp1, temp1, temp2, LSL #8 467 STR temp1, [pDst], #4 468 469 USAT16 temp2, #13, p11p9 470 USAT16 temp1, #13, p10p8 471 SADD16 p11p9, p11p9, c 472 SADD16 p10p8, p10p8, c 473 AND temp2, r0x00FF00FF, temp2, ASR #5 474 AND temp1, r0x00FF00FF, temp1, ASR #5 475 ORR temp1, temp1, temp2, LSL #8 476 STR temp1, [pDst], #4 477 478 USAT16 temp2, #13, p15p13 479 USAT16 temp1, #13, p14p12 480 SADD16 p15p13, p15p13, c 481 SADD16 p14p12, p14p12, c 482 AND temp2, r0x00FF00FF, temp2, ASR #5 483 AND temp1, r0x00FF00FF, temp1, ASR #5 484 ORR temp1, temp1, temp2, LSL #8 485 STR temp1, [pDst], #4 486 487 ADDS r0x00FF00FF, r0x00FF00FF, #1<<28 ;// Loop counter value in top 4 bits 488 489 ADD pDst, pDst, dstStep 490 491 BCC LOOP_PLANE ;// Loop for 16 times 492 MOV return, #OMX_Sts_NoErr 493 M_END 494 495 ENDIF ;// ARM1136JS 496 497 498 END 499 ;----------------------------------------------------------------------------------------------- 500 ; omxVCM4P10_PredictIntra_16x16 ends 501 ;----------------------------------------------------------------------------------------------- 502