1 ;// 2 ;// Copyright (C) 2007-2008 ARM Limited 3 ;// 4 ;// Licensed under the Apache License, Version 2.0 (the "License"); 5 ;// you may not use this file except in compliance with the License. 6 ;// You may obtain a copy of the License at 7 ;// 8 ;// http://www.apache.org/licenses/LICENSE-2.0 9 ;// 10 ;// Unless required by applicable law or agreed to in writing, software 11 ;// distributed under the License is distributed on an "AS IS" BASIS, 12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ;// See the License for the specific language governing permissions and 14 ;// limitations under the License. 15 ;// 16 ;// 17 ;// 18 ;// File Name: omxVCM4P10_PredictIntra_4x4_s.s 19 ;// OpenMAX DL: v1.0.2 20 ;// Revision: 9641 21 ;// Date: Thursday, February 7, 2008 22 ;// 23 ;// 24 ;// 25 ;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31 ;// Define the processor variants supported by this file 32 33 M_VARIANTS ARM1136JS 34 35 ;//------------------------------------------------------- 36 ;// This table for implementing switch case of C in asm by 37 ;// the mehtod of two levels of indexing. 38 ;//------------------------------------------------------- 39 40 M_TABLE armVCM4P10_pSwitchTable4x4 41 DCD OMX_VC_4x4_VERT, OMX_VC_4x4_HOR 42 DCD OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL 43 DCD OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR 44 DCD OMX_VC_4x4_HD, OMX_VC_4x4_VL 45 DCD OMX_VC_4x4_HU 46 47 IF ARM1136JS 48 49 ;//-------------------------------------------- 50 ;// Constants 51 ;//-------------------------------------------- 52 BLK_SIZE EQU 0x8 53 MUL_CONST0 EQU 0x01010101 54 ADD_CONST1 EQU 0x80808080 55 56 ;//-------------------------------------------- 57 ;// Scratch variable 58 ;//-------------------------------------------- 59 return RN 0 60 pTable RN 9 61 pc RN 15 62 r0x01010101 RN 1 63 r0x80808080 RN 0 64 65 tVal0 RN 0 66 tVal1 RN 1 67 tVal2 RN 2 68 tVal4 RN 4 69 tVal6 RN 6 70 tVal7 RN 7 71 tVal8 RN 8 72 tVal9 RN 9 73 tVal10 RN 10 74 tVal11 RN 11 75 tVal12 RN 12 76 tVal14 RN 14 77 78 Out0 RN 6 79 Out1 RN 7 80 Out2 RN 8 81 Out3 RN 9 82 83 Left0 RN 6 84 Left1 RN 7 85 Left2 RN 8 86 Left3 RN 9 87 88 Above0123 RN 12 89 Above4567 RN 14 90 91 AboveLeft RN 10 92 93 ;//-------------------------------------------- 94 ;// Declare input registers 95 ;//-------------------------------------------- 96 pSrcLeft RN 0 ;// input pointer 97 pSrcAbove RN 1 ;// input pointer 98 pSrcAboveLeft RN 2 ;// input pointer 99 pDst RN 3 ;// output pointer 100 leftStep RN 4 ;// input variable 101 dstStep RN 5 ;// input variable 102 predMode RN 6 ;// input variable 103 availability RN 7 ;// input variable 104 105 ;//----------------------------------------------------------------------------------------------- 106 ;// omxVCM4P10_PredictIntra_4x4 starts 107 ;//----------------------------------------------------------------------------------------------- 108 109 ;// Write function header 110 M_START omxVCM4P10_PredictIntra_4x4, r11 111 112 ;// Define stack arguments 113 M_ARG LeftStep, 4 114 M_ARG DstStep, 4 115 M_ARG PredMode, 4 116 M_ARG Availability, 4 117 118 ;// M_STALL ARM1136JS=4 119 120 LDR pTable,=armVCM4P10_pSwitchTable4x4 ;// Load index table for switch case 121 122 ;// Load argument from the stack 123 M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg 124 M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg 125 M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg 126 M_LDR availability, Availability ;// Arg availability loaded from stack to reg 127 128 LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode 129 130 OMX_VC_4x4_VERT 131 132 LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] 133 M_STR Above0123, [pDst], dstStep ;// pDst[0 to 3] = Above0123 134 M_STR Above0123, [pDst], dstStep ;// pDst[4 to 7] = Above0123 135 M_STR Above0123, [pDst], dstStep ;// pDst[8 to 11] = Above0123 136 STR Above0123, [pDst] ;// pDst[12 to 15] = Above0123 137 MOV return, #OMX_Sts_NoErr 138 M_EXIT ;// Macro to exit midway-break frm case 139 140 OMX_VC_4x4_HOR 141 142 ;// M_STALL ARM1136JS=6 143 144 LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times 145 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] 146 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] 147 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] 148 LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] 149 MUL Out0, Left0, r0x01010101 ;// replicate the val in all the bytes 150 MUL Out1, Left1, r0x01010101 ;// replicate the val in all the bytes 151 MUL Out2, Left2, r0x01010101 ;// replicate the val in all the bytes 152 MUL Out3, Left3, r0x01010101 ;// replicate the val in all the bytes 153 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 154 M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] 155 M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] 156 STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] 157 MOV return, #OMX_Sts_NoErr 158 M_EXIT ;// Macro to exit midway-break frm case 159 160 OMX_VC_4x4_DC 161 162 ;// M_STALL ARM1136JS=6 163 164 AND availability, availability, #(OMX_VC_UPPER + OMX_VC_LEFT) 165 CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT) 166 BNE UpperOrLeftOrNoneAvailable ;// Jump to Upper if not both 167 LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] 168 169 ;// M_STALL ARM1136JS=1 170 171 UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2] 172 UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3] 173 UADD16 tVal11, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] 174 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] 175 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] 176 ADD tVal11, tVal11, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) 177 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] 178 LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] 179 UXTH tVal11, tVal11 ;// upsum1 (Clear the top junk bits) 180 ADD tVal6, Left0, Left1 ;// tVal6 = Left0 + Left1 181 ADD tVal7, Left2, Left3 ;// tVal7 = Left2 + Left3 182 ADD tVal6, tVal6, tVal7 ;// tVal6 = tVal6 + tVal7 183 ADD Out0, tVal6, tVal11 ;// Out0 = tVal6 + tVal11 184 ADD Out0, Out0, #4 ;// Out0 = Out0 + 4 185 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 186 MOV Out0, Out0, LSR #3 ;// Out0 = (Out0 + 4)>>3 187 188 ;// M_STALL ARM1136JS=1 189 190 MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes 191 192 ;// M_STALL ARM1136JS=1 193 194 MOV return, #OMX_Sts_NoErr 195 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 196 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] 197 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] 198 STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] 199 M_EXIT ;// Macro to exit midway-break frm case 200 201 UpperOrLeftOrNoneAvailable 202 ;// M_STALL ARM1136JS=3 203 204 CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) 205 BNE LeftOrNoneAvailable ;// Jump to Left if not upper 206 LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] 207 208 ;// M_STALL ARM1136JS=3 209 210 UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2] 211 UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3] 212 UADD16 Out0, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] 213 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 214 ADD Out0, Out0, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) 215 216 ;// M_STALL ARM1136JS=1 217 218 UXTH Out0, Out0 ;// upsum1 (Clear the top junk bits) 219 ADD Out0, Out0, #2 ;// Out0 = Out0 + 2 220 221 ;// M_STALL ARM1136JS=1 222 223 MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2 224 225 ;// M_STALL ARM1136JS=1 226 227 MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes 228 229 ;// M_STALL ARM1136JS=1 230 231 MOV return, #OMX_Sts_NoErr 232 M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [0 to 3 ] 233 M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [4 to 7 ] 234 M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [8 to 11] 235 STR Out0, [pDst] ;// store {tVal6} at pDst [12 to 15] 236 237 M_EXIT ;// Macro to exit midway-break frm case 238 239 LeftOrNoneAvailable 240 ;// M_STALL ARM1136JS=3 241 242 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 243 CMP availability, #OMX_VC_LEFT 244 BNE NoneAvailable 245 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] 246 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] 247 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] 248 LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] 249 ADD Out0, Left0, Left1 ;// Out0 = Left0 + Left1 250 251 ;// M_STALL ARM1136JS=1 252 253 ADD Out1, Left2, Left3 ;// Out1 = Left2 + Left3 254 ADD Out0, Out0, Out1 ;// Out0 = Out0 + Out1 255 ADD Out0, Out0, #2 ;// Out0 = Out0 + 2 256 257 ;// M_STALL ARM1136JS=1 258 259 MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2 260 261 ;// M_STALL ARM1136JS=1 262 263 MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes 264 265 ;// M_STALL ARM1136JS=1 266 267 MOV return, #OMX_Sts_NoErr 268 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 269 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] 270 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] 271 STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] 272 M_EXIT ;// Macro to exit midway-break frm case 273 274 NoneAvailable 275 MOV Out0, #128 ;// Out0 = 128 if(count == 0) 276 277 ;// M_STALL ARM1136JS=5 278 279 MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes 280 281 ;// M_STALL ARM1136JS=1 282 283 MOV return, #OMX_Sts_NoErr 284 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 285 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] 286 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] 287 STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] 288 M_EXIT ;// Macro to exit midway-break frm case 289 290 OMX_VC_4x4_DIAG_DL 291 292 ;//------------------------------------------------------------------ 293 ;// f = (a+2*b+c+2)>>2 294 ;// Calculate as: 295 ;// d = (a + c )>>1 296 ;// e = (d - b')>>1 297 ;// f = e + 128 298 ;//------------------------------------------------------------------ 299 300 ;// M_STALL ARM1136JS=3 301 302 TST availability, #OMX_VC_UPPER_RIGHT 303 LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7] 304 LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 305 BNE DLUpperRightAvailable 306 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 307 MOV tVal7, Above0123, LSR #24 ;// {00, 00, 00, U3 } 308 MOV tVal11, tVal7, LSL #24 ;// {U3, 00, 00, 00 } 309 MUL Out3, tVal7, r0x01010101 ;// {U3, U3, U3, U3 } 310 MOV tVal8, Above0123, LSR #16 ;// {00, 00, U3, U2 } 311 MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 } 312 MVN tVal10, tVal10 ;// {00', U3', U2', U1'} 313 UHADD8 tVal8, tVal8, Above0123 ;// {xx, xx, d1, d0 } 314 UHADD8 tVal6, Above0123, tVal9 ;// {xx, d2, xx, xx } 315 UHSUB8 tVal8, tVal8, tVal10 ;// {xx, xx, e1, e0 } 316 UHSUB8 tVal6, tVal6, tVal10 ;// {xx, e2, xx, xx } 317 UADD8 tVal8, tVal8, r0x80808080 ;// {xx, xx, f1, f0 } 318 UADD8 tVal6, tVal6, r0x80808080 ;// {xx, f2, xx, xx } 319 320 ;// M_STALL ARM1136JS=1 321 322 PKHBT tVal6, tVal8, tVal6 ;// {xx, f2, f1, f0 } 323 BIC tVal6, tVal6, #0xFF000000 ;// {00, f2, f1, f0 } 324 ORR Out0, tVal6, tVal11 ;// {U3, f2, f1, f0 } 325 326 ;// M_STALL ARM1136JS=1 327 328 PKHTB Out1, Out3, Out0, ASR #8 ;// {U3, U3, f2, f1 } 329 MOV return, #OMX_Sts_NoErr 330 PKHTB Out2, Out3, Out1, ASR #8 ;// {U3, U3, U3, f2 } 331 332 M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ] 333 M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ] 334 M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ] 335 STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12] 336 M_EXIT ;// Macro to exit midway-break frm case 337 338 DLUpperRightAvailable 339 340 MOV tVal8, Above0123, LSR #24 ;// {00, 00, 00, U3 } 341 MOV tVal9, Above0123, LSR #16 ;// {00, 00, U3, U2 } 342 MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 } 343 ORR tVal8, tVal8, Above4567, LSL #8 ;// {U6, U5, U4, U3 } 344 ORR tVal10, tVal10, Above4567, LSL #24 ;// {U4, U3, U2, U1 } 345 PKHBT tVal9, tVal9, Above4567, LSL #16 ;// {U5, U4, U3, U2 } 346 MVN tVal1, tVal8 ;// {U6', U5', U4', U3'} 347 MVN tVal10, tVal10 ;// {U4', U3', U2', U1'} 348 MVN tVal2, Above4567 ;// {U7', U6', U5', U4'} 349 UHADD8 tVal6, Above0123, tVal9 ;// {d3, d2, d1, d0 } 350 UHADD8 tVal9, tVal9, Above4567 ;// {d5, d4, d3, d2 } 351 UHADD8 tVal8, Above4567, tVal8 ;// {d6, xx, xx, xx } 352 UHSUB8 tVal6, tVal6, tVal10 ;// {e3, e2, e1, e0 } 353 UHSUB8 tVal12, tVal9, tVal1 ;// {e5, e4, e3, e2 } 354 UHSUB8 tVal8, tVal8, tVal2 ;// {e6, xx, xx, xx } 355 UADD8 Out0, tVal6, r0x80808080 ;// {f3, f2, f1, f0 } 356 UADD8 tVal9, tVal8, r0x80808080 ;// {f6, xx, xx, xx } 357 UADD8 Out2, tVal12, r0x80808080 ;// {f5, f4, f3, f2 } 358 MOV tVal7, Out0, LSR #8 ;// {00, f3, f2, f1 } 359 AND tVal9, tVal9, #0xFF000000 ;// {f6, 00, 00, 00 } 360 PKHBT Out1, tVal7, Out2, LSL #8 ;// {f4, f3, f2, f1 } 361 ORR Out3, tVal9, Out2, LSR #8 ;// {f6, f5, f4, f3 } 362 M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ] 363 M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ] 364 M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ] 365 STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12] 366 MOV return, #OMX_Sts_NoErr 367 M_EXIT ;// Macro to exit midway-break frm case 368 369 370 OMX_VC_4x4_DIAG_DR 371 372 ;// M_STALL ARM1136JS=4 373 374 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] 375 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] 376 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] 377 LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] 378 LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = pSrcAboveLeft[0] 379 ORR tVal7, Left1, Left0, LSL #8 ;// tVal7 = 00 00 L0 L1 380 LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 381 LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 382 ORR tVal8, Left3, Left2, LSL #8 ;// tVal8 = 00 00 L2 L3 383 PKHBT tVal7, tVal8, tVal7, LSL #16 ;// tVal7 = L0 L1 L2 L3 384 MOV tVal8, Above0123, LSL #8 ;// tVal8 = U2 U1 U0 00 385 MOV tVal9, tVal7, LSR #8 ;// tVal9 = 00 L0 L1 L2 386 ORR tVal8, tVal8, AboveLeft ;// tVal8 = U2 U1 U0 UL 387 ORR tVal9, tVal9, AboveLeft, LSL #24 ;// tVal9 = UL L0 L1 L2 388 MOV tVal10, Above0123, LSL #24 ;// tVal10= U0 00 00 00 389 UXTB tVal11, tVal7, ROR #24 ;// tVal11= 00 00 00 L0 390 ORR tVal10, tVal10, tVal9, LSR #8 ;// tVal10= U0 UL L0 L1 391 ORR tVal11, tVal11, tVal8, LSL #8 ;// tVal11= U1 U0 UL L0 392 UHADD8 tVal11, Above0123, tVal11 ;// tVal11= d1 d0 dL g0 393 UHADD8 tVal10, tVal7, tVal10 ;// tVal10= g0 g1 g2 g3 394 MVN tVal8, tVal8 ;// tVal8 = U2'U1'U0'UL' 395 MVN tVal9, tVal9 ;// tVal9 = UL'L0'L1'L2' 396 UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= e1 e0 eL h0 397 UHSUB8 tVal10, tVal10, tVal9 ;// tVal10= h0 h1 h2 h3 398 UADD8 Out3, tVal10, r0x80808080 ;// Out3 = i0 i1 i2 i3 399 UADD8 Out0, tVal11, r0x80808080 ;// Out0 = f1 f0 fL i0 400 UXTH tVal11, Out3, ROR #8 ;// tVal11= 00 00 i1 i2 401 MOV tVal7, Out0, LSL #8 ;// tVal7 = f0 fL i0 00 402 ORR Out1, tVal7, tVal11, LSR #8 ;// Out1 = f0 fL i0 i1 403 PKHBT Out2, tVal11, Out0, LSL #16 ;// Out2 = fL i0 i1 i2 404 M_STR Out0, [pDst], dstStep ;// store {f1 to i0} at pDst[3 to 0 ] 405 M_STR Out1, [pDst], dstStep ;// store {f0 to i1} at pDst[7 to 4 ] 406 M_STR Out2, [pDst], dstStep ;// store {fL to i2} at pDst[11 to 8 ] 407 STR Out3, [pDst] ;// store {i0 to i3} at pDst[15 to 12] 408 MOV return, #OMX_Sts_NoErr 409 M_EXIT ;// Macro to exit midway-break frm case 410 411 OMX_VC_4x4_VR 412 413 ;// M_STALL ARM1136JS=4 414 415 LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 416 LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL 417 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0 418 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1 419 LDRB Left2, [pSrcLeft] ;// Left2 = 00 00 00 L2 420 MOV tVal0, Above0123, LSL #8 ;// tVal0 = U2 U1 U0 00 421 MOV tVal9, Above0123 ;// tVal9 = U3 U2 U1 U0 422 ORR tVal14, tVal0, AboveLeft ;// tVal14 = U2 U1 U0 UL 423 MVN tVal11, tVal14 ;// tVal11 = U2'U1'U0'UL' 424 MOV tVal2, tVal14, LSL #8 ;// tVal2 = U1 U0 UL 00 425 UHSUB8 tVal1, Above0123, tVal11 ;// tVal1 = d2 d1 d0 dL 426 UHADD8 tVal10, AboveLeft, Left1 ;// tVal10 = 00 00 00 j1 427 MVN tVal4, Left0 ;// tVal4 = 00 00 00 L0' 428 UHSUB8 tVal4, tVal10, tVal4 ;// tVal4 = 00 00 00 k1 429 ORR tVal12, tVal0, Left0 ;// tVal12 = U2 U1 U0 L0 430 ORR tVal14, tVal2, Left0 ;// tVal14 = U1 U0 UL L0 431 LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 432 UHADD8 tVal10, tVal9, tVal14 ;// tVal10 = g3 g2 g1 g0 433 UADD8 Out0, tVal1, r0x80808080 ;// Out0 = e2 e1 e0 eL 434 UHSUB8 tVal10, tVal10, tVal11 ;// tVal10 = h3 h2 h1 h0 435 M_STR Out0, [pDst], dstStep ;// store {e2 to eL} at pDst[3 to 0 ] 436 MOV tVal1, tVal14, LSL #8 ;// tVal1 = U0 UL L0 00 437 MOV tVal6, Out0, LSL #8 ;// tVal6 = e1 e0 eL 00 438 ORR tVal2, tVal2, Left1 ;// tVal2 = U1 U0 UL L1 439 UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 00 00 l1 440 UADD8 Out1, tVal10, r0x80808080 ;// Out1 = i3 i2 i1 i0 441 MVN tVal2, tVal2 ;// tVal14 = U1'U0'UL'L1' 442 ORR tVal1, tVal1, Left2 ;// tVal1 = U0 UL L0 L2 443 ORR Out2, tVal6, tVal4 ;// Out2 = e1 e0 eL l1 444 UHADD8 tVal1, tVal1, tVal12 ;// tVal1 = g2 g1 g0 j2 445 M_STR Out1, [pDst], dstStep ;// store {i3 to i0} at pDst[7 to 4 ] 446 M_STR Out2, [pDst], dstStep ;// store {e1 to l1} at pDst[11 to 8 ] 447 UHSUB8 tVal9, tVal1, tVal2 ;// tVal9 = h2 h1 h0 k2 448 UADD8 Out3, tVal9, r0x80808080 ;// Out3 = i2 i1 i0 l2 449 STR Out3, [pDst] ;// store {i2 to l2} at pDst[15 to 12] 450 MOV return, #OMX_Sts_NoErr 451 M_EXIT ;// Macro to exit midway-break frm case 452 453 OMX_VC_4x4_HD 454 455 ;// M_STALL ARM1136JS=4 456 457 LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 458 LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL 459 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0 460 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1 461 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = 00 00 00 L2 462 LDRB Left3, [pSrcLeft] ;// Left3 = 00 00 00 L3 463 LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 464 ORR tVal2, AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL 465 MVN tVal1, Left0 ;// tVal1 = 00 00 00 L0' 466 ORR tVal4, Left0, tVal2, LSL #8 ;// tVal4 = U1 U0 UL L0 467 MVN tVal2, tVal2 ;// tVal2 = U2'U1'U0'UL' 468 UHADD8 tVal4, tVal4, Above0123 ;// tVal4 = g3 g2 g1 g0 469 UHSUB8 tVal1, AboveLeft, tVal1 ;// tVal1 = 00 00 00 dL 470 UHSUB8 tVal4, tVal4, tVal2 ;// tVal4 = h3 h2 h1 h0 471 UADD8 tVal1, tVal1, r0x80808080 ;// tVal1 = 00 00 00 eL 472 UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = i3 i2 i1 i0 473 ORR tVal2, Left0, AboveLeft, LSL #16 ;// tVal2 = 00 UL 00 L0 474 MOV tVal4, tVal4, LSL #8 ;// tVal4 = i2 i1 i0 00 475 ORR tVal11, Left1, Left0, LSL #16 ;// tVal11= 00 L0 00 L1 476 ORR tVal7, Left2, Left1, LSL #16 ;// tVal7 = 00 L1 00 L2 477 ORR tVal10, Left3, Left2, LSL #16 ;// tVal10= 00 L2 00 L3 478 ORR Out0, tVal4, tVal1 ;// Out0 = i2 i1 i0 eL 479 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 480 MOV tVal4, Out0, LSL #16 ;// tVal4 = i1 i0 00 00 481 UHADD8 tVal2, tVal2, tVal7 ;// tVal2 = 00 j1 00 j2 482 UHADD8 tVal6, tVal11, tVal10 ;// tVal11= 00 j2 00 j3 483 MVN tVal12, tVal11 ;// tVal12= 00 L0'00 L1' 484 MVN tVal14, tVal7 ;// tVal14= 00 L1'00 L2' 485 UHSUB8 tVal2, tVal2, tVal12 ;// tVal2 = 00 k1 00 k2 486 UHSUB8 tVal8, tVal7, tVal12 ;// tVal8 = 00 d1 00 d2 487 UHSUB8 tVal11, tVal6, tVal14 ;// tVal11= 00 k2 00 k3 488 UHSUB8 tVal9, tVal10, tVal14 ;// tVal9 = 00 d2 00 d3 489 UADD8 tVal2, tVal2, r0x80808080 ;// tVal2 = 00 l1 00 l2 490 UADD8 tVal8, tVal8, r0x80808080 ;// tVal8 = 00 e1 00 e2 491 UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 l2 00 l3 492 UADD8 tVal9, tVal9, r0x80808080 ;// tVal9 = 00 e2 00 e3 493 ORR Out2, tVal8, tVal2, LSL #8 ;// Out2 = l1 e1 l2 e2 494 ORR Out3, tVal9, tVal11, LSL #8 ;// Out3 = l2 e2 l3 e3 495 PKHTB Out1, tVal4, Out2, ASR #16 ;// Out1 = i1 i0 l1 e1 496 M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] 497 M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] 498 STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] 499 MOV return, #OMX_Sts_NoErr 500 M_EXIT ;// Macro to exit midway-break frm case 501 502 OMX_VC_4x4_VL 503 504 ;// M_STALL ARM1136JS=3 505 506 LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7] 507 TST availability, #OMX_VC_UPPER_RIGHT 508 LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 509 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 510 MOV tVal11, Above0123, LSR #24 ;// tVal11= 00 00 00 U3 511 MULEQ Above4567, tVal11, r0x01010101 ;// Above4567 = U3 U3 U3 U3 512 MOV tVal9, Above0123, LSR #8 ;// tVal9 = 00 U3 U2 U1 513 MVN tVal10, Above0123 ;// tVal10= U3'U2'U1'U0' 514 ORR tVal2, tVal9, Above4567, LSL #24 ;// tVal2 = U4 U3 U2 U1 515 UHSUB8 tVal8, tVal2, tVal10 ;// tVal8 = d4 d3 d2 d1 516 UADD8 Out0, tVal8, r0x80808080 ;// Out0 = e4 e3 e2 e1 517 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 518 MOV tVal9, tVal9, LSR #8 ;// tVal9 = 00 00 U3 U2 519 MOV tVal10, Above4567, LSL #8 ;// tVal10= U6 U5 U4 00 520 PKHBT tVal9, tVal9, Above4567, LSL #16 ;// tVal9 = U5 U4 U3 U2 521 ORR tVal10, tVal10, tVal11 ;// tVal10= U6 U5 U4 U3 522 UHADD8 tVal11, tVal9, Above0123 ;// tVal11= g5 g4 g3 g2 523 UHADD8 tVal14, tVal2, tVal10 ;// tVal14= g6 g5 g4 g3 524 MVN tVal8, tVal2 ;// tVal8 = U4'U3'U2'U1' 525 MVN tVal7, tVal9 ;// tVal7 = U5'U4'U3'U2' 526 UHSUB8 tVal12, tVal9, tVal8 ;// tVal12= d5 d4 d3 d2 527 UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= h5 h4 h3 h2 528 UHSUB8 tVal2, tVal14, tVal7 ;// tVal2 = h6 h5 h4 h3 529 UADD8 Out1, tVal11, r0x80808080 ;// Out1 = i5 i4 i3 i2 530 UADD8 Out2, tVal12, r0x80808080 ;// Out2 = e5 e4 e3 e2 531 UADD8 Out3, tVal2, r0x80808080 ;// Out3 = i6 i5 i4 i3 532 M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] 533 M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] 534 M_STR Out3, [pDst], dstStep ;// store {Out3} at pDst [12 to 15] 535 MOV return, #OMX_Sts_NoErr 536 M_EXIT ;// Macro to exit midway-break frm case 537 538 OMX_VC_4x4_HU 539 540 ;// M_STALL ARM1136JS=2 541 542 LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 543 M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] 544 M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] 545 M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] 546 LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] 547 MOV r0x80808080, r0x01010101, LSL #7 ;// 0x80808080 548 ORR tVal6, Left0, Left1, LSL #16 ;// tVal6 = 00 L1 00 L0 549 ORR tVal7, Left1, Left2, LSL #16 ;// tVal7 = 00 L2 00 L1 550 ORR tVal11, Left2, Left3, LSL #16 ;// tVal11= 00 L3 00 L2 551 MUL Out3, Left3, r0x01010101 ;// Out3 = L3 L3 L3 L3 552 MVN tVal8, tVal7 ;// tVal8 = 00 L2'00 L1' 553 MVN tVal10, tVal11 ;// tVal10= 00 L3'00 L2' 554 UHADD8 tVal4, tVal6, tVal11 ;// tVal4 = 00 g3 00 g2 555 UXTB16 tVal12, Out3 ;// tVal12= 00 L3 00 L3 556 UHSUB8 tVal4, tVal4, tVal8 ;// tVal4 = 00 h3 00 h2 557 UHSUB8 tVal6, tVal6, tVal8 ;// tVal6 = 00 d2 00 d1 558 UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= 00 d3 00 d2 559 UHADD8 tVal12, tVal12, tVal7 ;// tVal12= 00 g4 00 g3 560 UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 i3 00 i2 561 UHSUB8 tVal12, tVal12, tVal10 ;// tVal12= 00 h4 00 h3 562 UADD8 tVal8, tVal6, r0x80808080 ;// tVal8 = 00 e2 00 e1 563 UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 e3 00 e2 564 UADD8 tVal12, tVal12, r0x80808080 ;// tVal12= 00 i4 00 i3 565 ORR Out0, tVal8, tVal4, LSL #8 ;// Out0 = i3 e2 i2 e1 566 ORR Out1, tVal11, tVal12, LSL #8 ;// Out1 = i4 e3 i3 e2 567 M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] 568 PKHTB Out2, Out3, Out1, ASR #16 ;// Out2 = L3 L3 i4 e3 569 M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] 570 M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] 571 STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] 572 MOV return, #OMX_Sts_NoErr 573 M_END 574 575 ENDIF ;// ARM1136JS 576 577 578 END 579 ;//----------------------------------------------------------------------------------------------- 580 ;// omxVCM4P10_PredictIntra_4x4 ends 581 ;//----------------------------------------------------------------------------------------------- 582