1 ;// 2 ;// 3 ;// File Name: armVCM4P10_Interpolate_Chroma_s.s 4 ;// OpenMAX DL: v1.0.2 5 ;// Revision: 9641 6 ;// Date: Thursday, February 7, 2008 7 ;// 8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ;// 10 ;// 11 ;// 12 13 14 INCLUDE omxtypes_s.h 15 INCLUDE armCOMM_s.h 16 17 M_VARIANTS CortexA8 18 19 20 IF CortexA8 21 22 M_TABLE armVCM4P10_WidthBranchTableMVIsNotZero 23 24 DCD WidthIs2MVIsNotZero, WidthIs2MVIsNotZero 25 DCD WidthIs4MVIsNotZero, WidthIs4MVIsNotZero 26 DCD WidthIs8MVIsNotZero 27 28 M_TABLE armVCM4P10_WidthBranchTableMVIsZero 29 30 DCD WidthIs2MVIsZero, WidthIs2MVIsZero 31 DCD WidthIs4MVIsZero, WidthIs4MVIsZero 32 DCD WidthIs8MVIsZero 33 34 35 ;// input registers 36 37 pSrc RN 0 38 iSrcStep RN 1 39 pDst RN 2 40 iDstStep RN 3 41 iWidth RN 4 42 iHeight RN 5 43 dx RN 6 44 dy RN 7 45 46 ;// local variable registers 47 pc RN 15 48 return RN 0 49 EightMinusdx RN 8 50 EightMinusdy RN 9 51 52 ACoeff RN 12 53 BCoeff RN 9 54 CCoeff RN 8 55 DCoeff RN 6 56 57 pTable RN 11 58 59 Step1 RN 10 60 SrcStepMinus1 RN 14 61 62 dACoeff DN D12.U8 63 dBCoeff DN D13.U8 64 dCCoeff DN D14.U8 65 dDCoeff DN D15.U8 66 67 dRow0a DN D0.U8 68 dRow0b DN D1.U8 69 dRow1a DN D2.U8 70 dRow1b DN D3.U8 71 72 qRow0a QN Q2.S16 73 qRow0b QN Q3.S16 74 75 ;//dIndex DN D16.U8 76 qRow1a QN Q11.S16 77 qRow1b QN Q12.S16 78 79 dRow2a DN D16.U8 80 dRow2b DN D17.U8 81 dRow3a DN D18.U8 82 dRow3b DN D19.U8 83 84 qOutRow2 QN Q11.U16 85 qOutRow3 QN Q12.U16 86 dOutRow2 DN D20.U8 87 dOutRow3 DN D21.U8 88 dOutRow2U64 DN D20.U64 89 dOutRow3U64 DN D21.U64 90 91 qOutRow0 QN Q2.U16 92 qOutRow1 QN Q3.U16 93 dOutRow0 DN D8.U8 94 dOutRow1 DN D9.U8 95 96 dOutRow0U64 DN D8.U64 97 dOutRow1U64 DN D9.U64 98 99 dOutRow0U32 DN D8.U32 100 dOutRow1U32 DN D9.U32 101 102 dOutRow0U16 DN D8.U16 103 dOutRow1U16 DN D9.U16 104 105 106 dOut0U64 DN D0.U64 107 dOut1U64 DN D1.U64 108 109 dOut00U32 DN D0.U32 110 dOut01U32 DN D1.U32 111 dOut10U32 DN D2.U32 112 dOut11U32 DN D3.U32 113 114 dOut0U16 DN D0.U16 115 dOut1U16 DN D1.U16 116 117 ;//----------------------------------------------------------------------------------------------- 118 ;// armVCM4P10_Interpolate_Chroma_asm starts 119 ;//----------------------------------------------------------------------------------------------- 120 121 ;// Write function header 122 M_START armVCM4P10_Interpolate_Chroma, r11, d15 123 124 ;// Define stack arguments 125 M_ARG Width, 4 126 M_ARG Height, 4 127 M_ARG Dx, 4 128 M_ARG Dy, 4 129 130 ;// Load argument from the stack 131 ;// M_STALL ARM1136JS=4 132 133 M_LDRD dx, dy, Dx 134 M_LDRD iWidth, iHeight, Width 135 136 ;// EightMinusdx = 8 - dx 137 ;// EightMinusdy = 8 - dy 138 139 ;// ACoeff = EightMinusdx * EightMinusdy 140 ;// BCoeff = dx * EightMinusdy 141 ;// CCoeff = EightMinusdx * dy 142 ;// DCoeff = dx * dy 143 144 RSB EightMinusdx, dx, #8 145 RSB EightMinusdy, dy, #8 146 CMN dx,dy 147 MOV Step1, #1 148 LDREQ pTable, =armVCM4P10_WidthBranchTableMVIsZero 149 SUB SrcStepMinus1, iSrcStep, Step1 150 LDRNE pTable, =armVCM4P10_WidthBranchTableMVIsNotZero 151 152 VLD1 dRow0a, [pSrc], Step1 ;// 0a 153 154 SMULBB ACoeff, EightMinusdx, EightMinusdy 155 SMULBB BCoeff, dx, EightMinusdy 156 VLD1 dRow0b, [pSrc], SrcStepMinus1 ;// 0b 157 SMULBB CCoeff, EightMinusdx, dy 158 SMULBB DCoeff, dx, dy 159 160 VDUP dACoeff, ACoeff 161 VDUP dBCoeff, BCoeff 162 VDUP dCCoeff, CCoeff 163 VDUP dDCoeff, DCoeff 164 165 LDR pc, [pTable, iWidth, LSL #1] ;// Branch to the case based on iWidth 166 167 ;// Pixel layout: 168 ;// 169 ;// x00 x01 x02 170 ;// x10 x11 x12 171 ;// x20 x21 x22 172 173 ;// If fractionl mv is not (0, 0) 174 WidthIs8MVIsNotZero 175 176 VLD1 dRow1a, [pSrc], Step1 ;// 1a 177 VMULL qRow0a, dRow0a, dACoeff 178 VLD1 dRow1b, [pSrc], SrcStepMinus1 ;// 1b 179 VMULL qRow0b, dRow1a, dACoeff 180 VLD1 dRow2a, [pSrc], Step1 ;// 2a 181 VMLAL qRow0a, dRow0b, dBCoeff 182 VLD1 dRow2b, [pSrc], SrcStepMinus1 ;// 2b 183 VMULL qRow1a, dRow2a, dACoeff 184 VMLAL qRow0b, dRow1b, dBCoeff 185 VLD1 dRow3a, [pSrc], Step1 ;// 3a 186 VMLAL qRow0a, dRow1a, dCCoeff 187 VMLAL qRow1a, dRow2b, dBCoeff 188 VMULL qRow1b, dRow3a, dACoeff 189 VLD1 dRow3b, [pSrc], SrcStepMinus1 ;// 3b 190 VMLAL qRow0b, dRow2a, dCCoeff 191 VLD1 dRow0a, [pSrc], Step1 ;// 0a 192 VMLAL qRow1b, dRow3b, dBCoeff 193 VMLAL qRow1a, dRow3a, dCCoeff 194 VMLAL qRow0a, dRow1b, dDCoeff 195 VLD1 dRow0b, [pSrc], SrcStepMinus1 ;// 0b 196 VMLAL qRow1b, dRow0a, dCCoeff 197 VMLAL qRow0b, dRow2b, dDCoeff 198 VMLAL qRow1a, dRow3b, dDCoeff 199 200 201 SUBS iHeight, iHeight, #4 202 VMLAL qRow1b, dRow0b, dDCoeff 203 204 VQRSHRN dOutRow0, qOutRow0, #6 205 VQRSHRN dOutRow1, qOutRow1, #6 206 VQRSHRN dOutRow2, qOutRow2, #6 207 VST1 dOutRow0U64, [pDst], iDstStep 208 VQRSHRN dOutRow3, qOutRow3, #6 209 210 VST1 dOutRow1U64, [pDst], iDstStep 211 VST1 dOutRow2U64, [pDst], iDstStep 212 VST1 dOutRow3U64, [pDst], iDstStep 213 214 215 BGT WidthIs8MVIsNotZero 216 MOV return, #OMX_Sts_NoErr 217 M_EXIT 218 219 WidthIs4MVIsNotZero 220 221 VLD1 dRow1a, [pSrc], Step1 222 VMULL qRow0a, dRow0a, dACoeff 223 VMULL qRow0b, dRow1a, dACoeff 224 VLD1 dRow1b, [pSrc], SrcStepMinus1 225 VMLAL qRow0a, dRow0b, dBCoeff 226 VMLAL qRow0b, dRow1b, dBCoeff 227 VLD1 dRow0a, [pSrc], Step1 228 VMLAL qRow0a, dRow1a, dCCoeff 229 VMLAL qRow0b, dRow0a, dCCoeff 230 VLD1 dRow0b, [pSrc], SrcStepMinus1 231 SUBS iHeight, iHeight, #2 232 VMLAL qRow0b, dRow0b, dDCoeff 233 VMLAL qRow0a, dRow1b, dDCoeff 234 235 VQRSHRN dOutRow1, qOutRow1, #6 236 VQRSHRN dOutRow0, qOutRow0, #6 237 238 VST1 dOutRow0U32[0], [pDst], iDstStep 239 VST1 dOutRow1U32[0], [pDst], iDstStep 240 241 BGT WidthIs4MVIsNotZero 242 MOV return, #OMX_Sts_NoErr 243 M_EXIT 244 245 WidthIs2MVIsNotZero 246 247 VLD1 dRow1a, [pSrc], Step1 248 VMULL qRow0a, dRow0a, dACoeff 249 VMULL qRow0b, dRow1a, dACoeff 250 VLD1 dRow1b, [pSrc], SrcStepMinus1 251 VMLAL qRow0a, dRow0b, dBCoeff 252 VMLAL qRow0b, dRow1b, dBCoeff 253 VLD1 dRow0a, [pSrc], Step1 254 VMLAL qRow0a, dRow1a, dCCoeff 255 VMLAL qRow0b, dRow0a, dCCoeff 256 VLD1 dRow0b, [pSrc], SrcStepMinus1 257 SUBS iHeight, iHeight, #2 258 VMLAL qRow0b, dRow0b, dDCoeff 259 VMLAL qRow0a, dRow1b, dDCoeff 260 261 VQRSHRN dOutRow1, qOutRow1, #6 262 VQRSHRN dOutRow0, qOutRow0, #6 263 264 VST1 dOutRow0U16[0], [pDst], iDstStep 265 VST1 dOutRow1U16[0], [pDst], iDstStep 266 267 BGT WidthIs2MVIsNotZero 268 MOV return, #OMX_Sts_NoErr 269 M_EXIT 270 271 ;// If fractionl mv is (0, 0) 272 WidthIs8MVIsZero 273 SUB pSrc, pSrc, iSrcStep 274 275 WidthIs8LoopMVIsZero 276 VLD1 dRow0a, [pSrc], iSrcStep 277 SUBS iHeight, iHeight, #2 278 VLD1 dRow0b, [pSrc], iSrcStep 279 VST1 dOut0U64, [pDst], iDstStep 280 VST1 dOut1U64, [pDst], iDstStep 281 BGT WidthIs8LoopMVIsZero 282 283 MOV return, #OMX_Sts_NoErr 284 M_EXIT 285 286 WidthIs4MVIsZero 287 VLD1 dRow0b, [pSrc], iSrcStep 288 289 SUBS iHeight, iHeight, #2 290 291 VST1 dOut00U32[0], [pDst], iDstStep 292 VLD1 dRow0a, [pSrc], iSrcStep 293 VST1 dOut01U32[0], [pDst], iDstStep 294 295 BGT WidthIs4MVIsZero 296 MOV return, #OMX_Sts_NoErr 297 M_EXIT 298 299 WidthIs2MVIsZero 300 VLD1 dRow0b, [pSrc], iSrcStep 301 SUBS iHeight, iHeight, #2 302 303 VST1 dOut0U16[0], [pDst], iDstStep 304 VLD1 dRow0a, [pSrc], iSrcStep 305 VST1 dOut1U16[0], [pDst], iDstStep 306 307 BGT WidthIs2MVIsZero 308 MOV return, #OMX_Sts_NoErr 309 M_END 310 311 ENDIF ;// CortexA8 312 313 END 314 315 ;//----------------------------------------------------------------------------------------------- 316 ;// armVCM4P10_Interpolate_Chroma_asm ends 317 ;//----------------------------------------------------------------------------------------------- 318 319