1 ;// 2 ;// 3 ;// File Name: omxVCM4P10_PredictIntra_4x4_s.s 4 ;// OpenMAX DL: v1.0.2 5 ;// Revision: 9641 6 ;// Date: Thursday, February 7, 2008 7 ;// 8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ;// 10 ;// 11 ;// 12 13 14 INCLUDE omxtypes_s.h 15 INCLUDE armCOMM_s.h 16 17 ;// Define the processor variants supported by this file 18 19 M_VARIANTS ARM1136JS 20 21 ;//------------------------------------------------------- 22 ;// This table for implementing switch case of C in asm by 23 ;// the mehtod of two levels of indexing. 24 ;//------------------------------------------------------- 25 26 M_TABLE armVCM4P10_pSwitchTable4x4 27 DCD OMX_VC_4x4_VERT, OMX_VC_4x4_HOR 28 DCD OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL 29 DCD OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR 30 DCD OMX_VC_4x4_HD, OMX_VC_4x4_VL 31 DCD OMX_VC_4x4_HU 32 33 IF ARM1136JS 34 35 ;//-------------------------------------------- 36 ;// Constants 37 ;//-------------------------------------------- 38 BLK_SIZE EQU 0x8 39 MUL_CONST0 EQU 0x01010101 40 ADD_CONST1 EQU 0x80808080 41 42 ;//-------------------------------------------- 43 ;// Scratch variable 44 ;//-------------------------------------------- 45 return RN 0 46 pTable RN 9 47 pc RN 15 48 r0x01010101 RN 1 49 r0x80808080 RN 0 50 51 tVal0 RN 0 52 tVal1 RN 1 53 tVal2 RN 2 54 tVal4 RN 4 55 tVal6 RN 6 56 tVal7 RN 7 57 tVal8 RN 8 58 tVal9 RN 9 59 tVal10 RN 10 60 tVal11 RN 11 61 tVal12 RN 12 62 tVal14 RN 14 63 64 Out0 RN 6 65 Out1 RN 7 66 Out2 RN 8 67 Out3 RN 9 68 69 Left0 RN 6 70 Left1 RN 7 71 Left2 RN 8 72 Left3 RN 9 73 74 Above0123 RN 12 75 Above4567 RN 14 76 77 AboveLeft RN 10 78 79 ;//-------------------------------------------- 80 ;// Declare input registers 81 ;//-------------------------------------------- 82 pSrcLeft RN 0 ;// input pointer 83 pSrcAbove RN 1 ;// input pointer 84 pSrcAboveLeft RN 2 ;// input pointer 85 pDst RN 3 ;// output pointer 86 leftStep RN 4 ;// input variable 87 dstStep RN 5 ;// input variable 88 predMode RN 6 ;// input variable 89 availability RN 7 ;// input variable 90 91 ;//----------------------------------------------------------------------------------------------- 92 ;// omxVCM4P10_PredictIntra_4x4 starts 93 ;//----------------------------------------------------------------------------------------------- 94 95 ;// Write function header 96 M_START omxVCM4P10_PredictIntra_4x4, r11 97 98 ;// Define stack arguments 99 M_ARG LeftStep, 4 100 M_ARG DstStep, 4 101 M_ARG PredMode, 4 102 M_ARG Availability, 4 103 104 ;// M_STALL ARM1136JS=4 105 106 LDR pTable,=armVCM4P10_pSwitchTable4x4 ;// Load index table for switch case 107 108 ;// Load argument from the stack 109 M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg 110 M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg 111 M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg 112 M_LDR availability, Availability ;// Arg availability loaded from stack to reg 113 114 LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode 115 116 OMX_VC_4x4_VERT 117 118 LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] 119 M_STR Above0123, [pDst], dstStep ;// pDst[0 to 3] = Above0123 120 M_STR Above0123, [pDst], dstStep ;// pDst[4 to 7] = Above0123 121 M_STR Above0123, [pDst], dstStep ;// pDst[8 to 11] = Above0123 122 STR Above0123, [pDst] ;// pDst[12 to 15] = Above0123 123 MOV return, #OMX_Sts_NoErr 124 M_EXIT ;// Macro to exit midway-break frm case 125 126 OMX_VC_4x4_HOR 127 128 ;// M_STALL ARM1136JS=6 129 130 LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times 131 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] 132 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] 133 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] 134 LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] 135 MUL Out0, Left0, r0x01010101 ;// replicate the val in all the bytes 136 MUL Out1, Left1, r0x01010101 ;// replicate the val in all the bytes 137 MUL Out2, Left2, r0x01010101 ;// replicate the val in all the bytes 138 MUL Out3, Left3, r0x01010101 ;// replicate the val in all the bytes 139 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 140 M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] 141 M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] 142 STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] 143 MOV return, #OMX_Sts_NoErr 144 M_EXIT ;// Macro to exit midway-break frm case 145 146 OMX_VC_4x4_DC 147 148 ;// M_STALL ARM1136JS=6 149 150 AND availability, availability, #(OMX_VC_UPPER + OMX_VC_LEFT) 151 CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT) 152 BNE UpperOrLeftOrNoneAvailable ;// Jump to Upper if not both 153 LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] 154 155 ;// M_STALL ARM1136JS=1 156 157 UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2] 158 UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3] 159 UADD16 tVal11, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] 160 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] 161 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] 162 ADD tVal11, tVal11, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) 163 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] 164 LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] 165 UXTH tVal11, tVal11 ;// upsum1 (Clear the top junk bits) 166 ADD tVal6, Left0, Left1 ;// tVal6 = Left0 + Left1 167 ADD tVal7, Left2, Left3 ;// tVal7 = Left2 + Left3 168 ADD tVal6, tVal6, tVal7 ;// tVal6 = tVal6 + tVal7 169 ADD Out0, tVal6, tVal11 ;// Out0 = tVal6 + tVal11 170 ADD Out0, Out0, #4 ;// Out0 = Out0 + 4 171 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 172 MOV Out0, Out0, LSR #3 ;// Out0 = (Out0 + 4)>>3 173 174 ;// M_STALL ARM1136JS=1 175 176 MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes 177 178 ;// M_STALL ARM1136JS=1 179 180 MOV return, #OMX_Sts_NoErr 181 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 182 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] 183 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] 184 STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] 185 M_EXIT ;// Macro to exit midway-break frm case 186 187 UpperOrLeftOrNoneAvailable 188 ;// M_STALL ARM1136JS=3 189 190 CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) 191 BNE LeftOrNoneAvailable ;// Jump to Left if not upper 192 LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] 193 194 ;// M_STALL ARM1136JS=3 195 196 UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2] 197 UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3] 198 UADD16 Out0, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] 199 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 200 ADD Out0, Out0, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) 201 202 ;// M_STALL ARM1136JS=1 203 204 UXTH Out0, Out0 ;// upsum1 (Clear the top junk bits) 205 ADD Out0, Out0, #2 ;// Out0 = Out0 + 2 206 207 ;// M_STALL ARM1136JS=1 208 209 MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2 210 211 ;// M_STALL ARM1136JS=1 212 213 MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes 214 215 ;// M_STALL ARM1136JS=1 216 217 MOV return, #OMX_Sts_NoErr 218 M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [0 to 3 ] 219 M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [4 to 7 ] 220 M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [8 to 11] 221 STR Out0, [pDst] ;// store {tVal6} at pDst [12 to 15] 222 223 M_EXIT ;// Macro to exit midway-break frm case 224 225 LeftOrNoneAvailable 226 ;// M_STALL ARM1136JS=3 227 228 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 229 CMP availability, #OMX_VC_LEFT 230 BNE NoneAvailable 231 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] 232 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] 233 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] 234 LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] 235 ADD Out0, Left0, Left1 ;// Out0 = Left0 + Left1 236 237 ;// M_STALL ARM1136JS=1 238 239 ADD Out1, Left2, Left3 ;// Out1 = Left2 + Left3 240 ADD Out0, Out0, Out1 ;// Out0 = Out0 + Out1 241 ADD Out0, Out0, #2 ;// Out0 = Out0 + 2 242 243 ;// M_STALL ARM1136JS=1 244 245 MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2 246 247 ;// M_STALL ARM1136JS=1 248 249 MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes 250 251 ;// M_STALL ARM1136JS=1 252 253 MOV return, #OMX_Sts_NoErr 254 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 255 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] 256 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] 257 STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] 258 M_EXIT ;// Macro to exit midway-break frm case 259 260 NoneAvailable 261 MOV Out0, #128 ;// Out0 = 128 if(count == 0) 262 263 ;// M_STALL ARM1136JS=5 264 265 MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes 266 267 ;// M_STALL ARM1136JS=1 268 269 MOV return, #OMX_Sts_NoErr 270 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 271 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] 272 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] 273 STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] 274 M_EXIT ;// Macro to exit midway-break frm case 275 276 OMX_VC_4x4_DIAG_DL 277 278 ;//------------------------------------------------------------------ 279 ;// f = (a+2*b+c+2)>>2 280 ;// Calculate as: 281 ;// d = (a + c )>>1 282 ;// e = (d - b')>>1 283 ;// f = e + 128 284 ;//------------------------------------------------------------------ 285 286 ;// M_STALL ARM1136JS=3 287 288 TST availability, #OMX_VC_UPPER_RIGHT 289 LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7] 290 LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 291 BNE DLUpperRightAvailable 292 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 293 MOV tVal7, Above0123, LSR #24 ;// {00, 00, 00, U3 } 294 MOV tVal11, tVal7, LSL #24 ;// {U3, 00, 00, 00 } 295 MUL Out3, tVal7, r0x01010101 ;// {U3, U3, U3, U3 } 296 MOV tVal8, Above0123, LSR #16 ;// {00, 00, U3, U2 } 297 MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 } 298 MVN tVal10, tVal10 ;// {00', U3', U2', U1'} 299 UHADD8 tVal8, tVal8, Above0123 ;// {xx, xx, d1, d0 } 300 UHADD8 tVal6, Above0123, tVal9 ;// {xx, d2, xx, xx } 301 UHSUB8 tVal8, tVal8, tVal10 ;// {xx, xx, e1, e0 } 302 UHSUB8 tVal6, tVal6, tVal10 ;// {xx, e2, xx, xx } 303 UADD8 tVal8, tVal8, r0x80808080 ;// {xx, xx, f1, f0 } 304 UADD8 tVal6, tVal6, r0x80808080 ;// {xx, f2, xx, xx } 305 306 ;// M_STALL ARM1136JS=1 307 308 PKHBT tVal6, tVal8, tVal6 ;// {xx, f2, f1, f0 } 309 BIC tVal6, tVal6, #0xFF000000 ;// {00, f2, f1, f0 } 310 ORR Out0, tVal6, tVal11 ;// {U3, f2, f1, f0 } 311 312 ;// M_STALL ARM1136JS=1 313 314 PKHTB Out1, Out3, Out0, ASR #8 ;// {U3, U3, f2, f1 } 315 MOV return, #OMX_Sts_NoErr 316 PKHTB Out2, Out3, Out1, ASR #8 ;// {U3, U3, U3, f2 } 317 318 M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ] 319 M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ] 320 M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ] 321 STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12] 322 M_EXIT ;// Macro to exit midway-break frm case 323 324 DLUpperRightAvailable 325 326 MOV tVal8, Above0123, LSR #24 ;// {00, 00, 00, U3 } 327 MOV tVal9, Above0123, LSR #16 ;// {00, 00, U3, U2 } 328 MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 } 329 ORR tVal8, tVal8, Above4567, LSL #8 ;// {U6, U5, U4, U3 } 330 ORR tVal10, tVal10, Above4567, LSL #24 ;// {U4, U3, U2, U1 } 331 PKHBT tVal9, tVal9, Above4567, LSL #16 ;// {U5, U4, U3, U2 } 332 MVN tVal1, tVal8 ;// {U6', U5', U4', U3'} 333 MVN tVal10, tVal10 ;// {U4', U3', U2', U1'} 334 MVN tVal2, Above4567 ;// {U7', U6', U5', U4'} 335 UHADD8 tVal6, Above0123, tVal9 ;// {d3, d2, d1, d0 } 336 UHADD8 tVal9, tVal9, Above4567 ;// {d5, d4, d3, d2 } 337 UHADD8 tVal8, Above4567, tVal8 ;// {d6, xx, xx, xx } 338 UHSUB8 tVal6, tVal6, tVal10 ;// {e3, e2, e1, e0 } 339 UHSUB8 tVal12, tVal9, tVal1 ;// {e5, e4, e3, e2 } 340 UHSUB8 tVal8, tVal8, tVal2 ;// {e6, xx, xx, xx } 341 UADD8 Out0, tVal6, r0x80808080 ;// {f3, f2, f1, f0 } 342 UADD8 tVal9, tVal8, r0x80808080 ;// {f6, xx, xx, xx } 343 UADD8 Out2, tVal12, r0x80808080 ;// {f5, f4, f3, f2 } 344 MOV tVal7, Out0, LSR #8 ;// {00, f3, f2, f1 } 345 AND tVal9, tVal9, #0xFF000000 ;// {f6, 00, 00, 00 } 346 PKHBT Out1, tVal7, Out2, LSL #8 ;// {f4, f3, f2, f1 } 347 ORR Out3, tVal9, Out2, LSR #8 ;// {f6, f5, f4, f3 } 348 M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ] 349 M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ] 350 M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ] 351 STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12] 352 MOV return, #OMX_Sts_NoErr 353 M_EXIT ;// Macro to exit midway-break frm case 354 355 356 OMX_VC_4x4_DIAG_DR 357 358 ;// M_STALL ARM1136JS=4 359 360 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] 361 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] 362 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] 363 LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] 364 LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = pSrcAboveLeft[0] 365 ORR tVal7, Left1, Left0, LSL #8 ;// tVal7 = 00 00 L0 L1 366 LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 367 LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 368 ORR tVal8, Left3, Left2, LSL #8 ;// tVal8 = 00 00 L2 L3 369 PKHBT tVal7, tVal8, tVal7, LSL #16 ;// tVal7 = L0 L1 L2 L3 370 MOV tVal8, Above0123, LSL #8 ;// tVal8 = U2 U1 U0 00 371 MOV tVal9, tVal7, LSR #8 ;// tVal9 = 00 L0 L1 L2 372 ORR tVal8, tVal8, AboveLeft ;// tVal8 = U2 U1 U0 UL 373 ORR tVal9, tVal9, AboveLeft, LSL #24 ;// tVal9 = UL L0 L1 L2 374 MOV tVal10, Above0123, LSL #24 ;// tVal10= U0 00 00 00 375 UXTB tVal11, tVal7, ROR #24 ;// tVal11= 00 00 00 L0 376 ORR tVal10, tVal10, tVal9, LSR #8 ;// tVal10= U0 UL L0 L1 377 ORR tVal11, tVal11, tVal8, LSL #8 ;// tVal11= U1 U0 UL L0 378 UHADD8 tVal11, Above0123, tVal11 ;// tVal11= d1 d0 dL g0 379 UHADD8 tVal10, tVal7, tVal10 ;// tVal10= g0 g1 g2 g3 380 MVN tVal8, tVal8 ;// tVal8 = U2'U1'U0'UL' 381 MVN tVal9, tVal9 ;// tVal9 = UL'L0'L1'L2' 382 UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= e1 e0 eL h0 383 UHSUB8 tVal10, tVal10, tVal9 ;// tVal10= h0 h1 h2 h3 384 UADD8 Out3, tVal10, r0x80808080 ;// Out3 = i0 i1 i2 i3 385 UADD8 Out0, tVal11, r0x80808080 ;// Out0 = f1 f0 fL i0 386 UXTH tVal11, Out3, ROR #8 ;// tVal11= 00 00 i1 i2 387 MOV tVal7, Out0, LSL #8 ;// tVal7 = f0 fL i0 00 388 ORR Out1, tVal7, tVal11, LSR #8 ;// Out1 = f0 fL i0 i1 389 PKHBT Out2, tVal11, Out0, LSL #16 ;// Out2 = fL i0 i1 i2 390 M_STR Out0, [pDst], dstStep ;// store {f1 to i0} at pDst[3 to 0 ] 391 M_STR Out1, [pDst], dstStep ;// store {f0 to i1} at pDst[7 to 4 ] 392 M_STR Out2, [pDst], dstStep ;// store {fL to i2} at pDst[11 to 8 ] 393 STR Out3, [pDst] ;// store {i0 to i3} at pDst[15 to 12] 394 MOV return, #OMX_Sts_NoErr 395 M_EXIT ;// Macro to exit midway-break frm case 396 397 OMX_VC_4x4_VR 398 399 ;// M_STALL ARM1136JS=4 400 401 LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 402 LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL 403 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0 404 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1 405 LDRB Left2, [pSrcLeft] ;// Left2 = 00 00 00 L2 406 MOV tVal0, Above0123, LSL #8 ;// tVal0 = U2 U1 U0 00 407 MOV tVal9, Above0123 ;// tVal9 = U3 U2 U1 U0 408 ORR tVal14, tVal0, AboveLeft ;// tVal14 = U2 U1 U0 UL 409 MVN tVal11, tVal14 ;// tVal11 = U2'U1'U0'UL' 410 MOV tVal2, tVal14, LSL #8 ;// tVal2 = U1 U0 UL 00 411 UHSUB8 tVal1, Above0123, tVal11 ;// tVal1 = d2 d1 d0 dL 412 UHADD8 tVal10, AboveLeft, Left1 ;// tVal10 = 00 00 00 j1 413 MVN tVal4, Left0 ;// tVal4 = 00 00 00 L0' 414 UHSUB8 tVal4, tVal10, tVal4 ;// tVal4 = 00 00 00 k1 415 ORR tVal12, tVal0, Left0 ;// tVal12 = U2 U1 U0 L0 416 ORR tVal14, tVal2, Left0 ;// tVal14 = U1 U0 UL L0 417 LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 418 UHADD8 tVal10, tVal9, tVal14 ;// tVal10 = g3 g2 g1 g0 419 UADD8 Out0, tVal1, r0x80808080 ;// Out0 = e2 e1 e0 eL 420 UHSUB8 tVal10, tVal10, tVal11 ;// tVal10 = h3 h2 h1 h0 421 M_STR Out0, [pDst], dstStep ;// store {e2 to eL} at pDst[3 to 0 ] 422 MOV tVal1, tVal14, LSL #8 ;// tVal1 = U0 UL L0 00 423 MOV tVal6, Out0, LSL #8 ;// tVal6 = e1 e0 eL 00 424 ORR tVal2, tVal2, Left1 ;// tVal2 = U1 U0 UL L1 425 UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 00 00 l1 426 UADD8 Out1, tVal10, r0x80808080 ;// Out1 = i3 i2 i1 i0 427 MVN tVal2, tVal2 ;// tVal14 = U1'U0'UL'L1' 428 ORR tVal1, tVal1, Left2 ;// tVal1 = U0 UL L0 L2 429 ORR Out2, tVal6, tVal4 ;// Out2 = e1 e0 eL l1 430 UHADD8 tVal1, tVal1, tVal12 ;// tVal1 = g2 g1 g0 j2 431 M_STR Out1, [pDst], dstStep ;// store {i3 to i0} at pDst[7 to 4 ] 432 M_STR Out2, [pDst], dstStep ;// store {e1 to l1} at pDst[11 to 8 ] 433 UHSUB8 tVal9, tVal1, tVal2 ;// tVal9 = h2 h1 h0 k2 434 UADD8 Out3, tVal9, r0x80808080 ;// Out3 = i2 i1 i0 l2 435 STR Out3, [pDst] ;// store {i2 to l2} at pDst[15 to 12] 436 MOV return, #OMX_Sts_NoErr 437 M_EXIT ;// Macro to exit midway-break frm case 438 439 OMX_VC_4x4_HD 440 441 ;// M_STALL ARM1136JS=4 442 443 LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 444 LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL 445 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0 446 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1 447 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = 00 00 00 L2 448 LDRB Left3, [pSrcLeft] ;// Left3 = 00 00 00 L3 449 LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 450 ORR tVal2, AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL 451 MVN tVal1, Left0 ;// tVal1 = 00 00 00 L0' 452 ORR tVal4, Left0, tVal2, LSL #8 ;// tVal4 = U1 U0 UL L0 453 MVN tVal2, tVal2 ;// tVal2 = U2'U1'U0'UL' 454 UHADD8 tVal4, tVal4, Above0123 ;// tVal4 = g3 g2 g1 g0 455 UHSUB8 tVal1, AboveLeft, tVal1 ;// tVal1 = 00 00 00 dL 456 UHSUB8 tVal4, tVal4, tVal2 ;// tVal4 = h3 h2 h1 h0 457 UADD8 tVal1, tVal1, r0x80808080 ;// tVal1 = 00 00 00 eL 458 UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = i3 i2 i1 i0 459 ORR tVal2, Left0, AboveLeft, LSL #16 ;// tVal2 = 00 UL 00 L0 460 MOV tVal4, tVal4, LSL #8 ;// tVal4 = i2 i1 i0 00 461 ORR tVal11, Left1, Left0, LSL #16 ;// tVal11= 00 L0 00 L1 462 ORR tVal7, Left2, Left1, LSL #16 ;// tVal7 = 00 L1 00 L2 463 ORR tVal10, Left3, Left2, LSL #16 ;// tVal10= 00 L2 00 L3 464 ORR Out0, tVal4, tVal1 ;// Out0 = i2 i1 i0 eL 465 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 466 MOV tVal4, Out0, LSL #16 ;// tVal4 = i1 i0 00 00 467 UHADD8 tVal2, tVal2, tVal7 ;// tVal2 = 00 j1 00 j2 468 UHADD8 tVal6, tVal11, tVal10 ;// tVal11= 00 j2 00 j3 469 MVN tVal12, tVal11 ;// tVal12= 00 L0'00 L1' 470 MVN tVal14, tVal7 ;// tVal14= 00 L1'00 L2' 471 UHSUB8 tVal2, tVal2, tVal12 ;// tVal2 = 00 k1 00 k2 472 UHSUB8 tVal8, tVal7, tVal12 ;// tVal8 = 00 d1 00 d2 473 UHSUB8 tVal11, tVal6, tVal14 ;// tVal11= 00 k2 00 k3 474 UHSUB8 tVal9, tVal10, tVal14 ;// tVal9 = 00 d2 00 d3 475 UADD8 tVal2, tVal2, r0x80808080 ;// tVal2 = 00 l1 00 l2 476 UADD8 tVal8, tVal8, r0x80808080 ;// tVal8 = 00 e1 00 e2 477 UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 l2 00 l3 478 UADD8 tVal9, tVal9, r0x80808080 ;// tVal9 = 00 e2 00 e3 479 ORR Out2, tVal8, tVal2, LSL #8 ;// Out2 = l1 e1 l2 e2 480 ORR Out3, tVal9, tVal11, LSL #8 ;// Out3 = l2 e2 l3 e3 481 PKHTB Out1, tVal4, Out2, ASR #16 ;// Out1 = i1 i0 l1 e1 482 M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] 483 M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] 484 STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] 485 MOV return, #OMX_Sts_NoErr 486 M_EXIT ;// Macro to exit midway-break frm case 487 488 OMX_VC_4x4_VL 489 490 ;// M_STALL ARM1136JS=3 491 492 LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7] 493 TST availability, #OMX_VC_UPPER_RIGHT 494 LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 495 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 496 MOV tVal11, Above0123, LSR #24 ;// tVal11= 00 00 00 U3 497 MULEQ Above4567, tVal11, r0x01010101 ;// Above4567 = U3 U3 U3 U3 498 MOV tVal9, Above0123, LSR #8 ;// tVal9 = 00 U3 U2 U1 499 MVN tVal10, Above0123 ;// tVal10= U3'U2'U1'U0' 500 ORR tVal2, tVal9, Above4567, LSL #24 ;// tVal2 = U4 U3 U2 U1 501 UHSUB8 tVal8, tVal2, tVal10 ;// tVal8 = d4 d3 d2 d1 502 UADD8 Out0, tVal8, r0x80808080 ;// Out0 = e4 e3 e2 e1 503 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 504 MOV tVal9, tVal9, LSR #8 ;// tVal9 = 00 00 U3 U2 505 MOV tVal10, Above4567, LSL #8 ;// tVal10= U6 U5 U4 00 506 PKHBT tVal9, tVal9, Above4567, LSL #16 ;// tVal9 = U5 U4 U3 U2 507 ORR tVal10, tVal10, tVal11 ;// tVal10= U6 U5 U4 U3 508 UHADD8 tVal11, tVal9, Above0123 ;// tVal11= g5 g4 g3 g2 509 UHADD8 tVal14, tVal2, tVal10 ;// tVal14= g6 g5 g4 g3 510 MVN tVal8, tVal2 ;// tVal8 = U4'U3'U2'U1' 511 MVN tVal7, tVal9 ;// tVal7 = U5'U4'U3'U2' 512 UHSUB8 tVal12, tVal9, tVal8 ;// tVal12= d5 d4 d3 d2 513 UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= h5 h4 h3 h2 514 UHSUB8 tVal2, tVal14, tVal7 ;// tVal2 = h6 h5 h4 h3 515 UADD8 Out1, tVal11, r0x80808080 ;// Out1 = i5 i4 i3 i2 516 UADD8 Out2, tVal12, r0x80808080 ;// Out2 = e5 e4 e3 e2 517 UADD8 Out3, tVal2, r0x80808080 ;// Out3 = i6 i5 i4 i3 518 M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] 519 M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] 520 M_STR Out3, [pDst], dstStep ;// store {Out3} at pDst [12 to 15] 521 MOV return, #OMX_Sts_NoErr 522 M_EXIT ;// Macro to exit midway-break frm case 523 524 OMX_VC_4x4_HU 525 526 ;// M_STALL ARM1136JS=2 527 528 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 529 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] 530 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] 531 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] 532 LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] 533 MOV r0x80808080, r0x01010101, LSL #7 ;// 0x80808080 534 ORR tVal6, Left0, Left1, LSL #16 ;// tVal6 = 00 L1 00 L0 535 ORR tVal7, Left1, Left2, LSL #16 ;// tVal7 = 00 L2 00 L1 536 ORR tVal11, Left2, Left3, LSL #16 ;// tVal11= 00 L3 00 L2 537 MUL Out3, Left3, r0x01010101 ;// Out3 = L3 L3 L3 L3 538 MVN tVal8, tVal7 ;// tVal8 = 00 L2'00 L1' 539 MVN tVal10, tVal11 ;// tVal10= 00 L3'00 L2' 540 UHADD8 tVal4, tVal6, tVal11 ;// tVal4 = 00 g3 00 g2 541 UXTB16 tVal12, Out3 ;// tVal12= 00 L3 00 L3 542 UHSUB8 tVal4, tVal4, tVal8 ;// tVal4 = 00 h3 00 h2 543 UHSUB8 tVal6, tVal6, tVal8 ;// tVal6 = 00 d2 00 d1 544 UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= 00 d3 00 d2 545 UHADD8 tVal12, tVal12, tVal7 ;// tVal12= 00 g4 00 g3 546 UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 i3 00 i2 547 UHSUB8 tVal12, tVal12, tVal10 ;// tVal12= 00 h4 00 h3 548 UADD8 tVal8, tVal6, r0x80808080 ;// tVal8 = 00 e2 00 e1 549 UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 e3 00 e2 550 UADD8 tVal12, tVal12, r0x80808080 ;// tVal12= 00 i4 00 i3 551 ORR Out0, tVal8, tVal4, LSL #8 ;// Out0 = i3 e2 i2 e1 552 ORR Out1, tVal11, tVal12, LSL #8 ;// Out1 = i4 e3 i3 e2 553 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 554 PKHTB Out2, Out3, Out1, ASR #16 ;// Out2 = L3 L3 i4 e3 555 M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] 556 M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] 557 STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] 558 MOV return, #OMX_Sts_NoErr 559 M_END 560 561 ENDIF ;// ARM1136JS 562 563 564 END 565 ;//----------------------------------------------------------------------------------------------- 566 ;// omxVCM4P10_PredictIntra_4x4 ends 567 ;//----------------------------------------------------------------------------------------------- 568