1 ;// 2 ;// Copyright (C) 2007-2008 ARM Limited 3 ;// 4 ;// Licensed under the Apache License, Version 2.0 (the "License"); 5 ;// you may not use this file except in compliance with the License. 6 ;// You may obtain a copy of the License at 7 ;// 8 ;// http://www.apache.org/licenses/LICENSE-2.0 9 ;// 10 ;// Unless required by applicable law or agreed to in writing, software 11 ;// distributed under the License is distributed on an "AS IS" BASIS, 12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ;// See the License for the specific language governing permissions and 14 ;// limitations under the License. 15 ;// 16 ;// 17 ;// 18 ;// File Name: armVCM4P10_Interpolate_Chroma_s.s 19 ;// OpenMAX DL: v1.0.2 20 ;// Revision: 9641 21 ;// Date: Thursday, February 7, 2008 22 ;// 23 ;// 24 ;// 25 ;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31 M_VARIANTS CortexA8 32 33 34 IF CortexA8 35 36 M_TABLE armVCM4P10_WidthBranchTableMVIsNotZero 37 38 DCD WidthIs2MVIsNotZero, WidthIs2MVIsNotZero 39 DCD WidthIs4MVIsNotZero, WidthIs4MVIsNotZero 40 DCD WidthIs8MVIsNotZero 41 42 M_TABLE armVCM4P10_WidthBranchTableMVIsZero 43 44 DCD WidthIs2MVIsZero, WidthIs2MVIsZero 45 DCD WidthIs4MVIsZero, WidthIs4MVIsZero 46 DCD WidthIs8MVIsZero 47 48 49 ;// input registers 50 51 pSrc RN 0 52 iSrcStep RN 1 53 pDst RN 2 54 iDstStep RN 3 55 iWidth RN 4 56 iHeight RN 5 57 dx RN 6 58 dy RN 7 59 60 ;// local variable registers 61 pc RN 15 62 return RN 0 63 EightMinusdx RN 8 64 EightMinusdy RN 9 65 66 ACoeff RN 12 67 BCoeff RN 9 68 CCoeff RN 8 69 DCoeff RN 6 70 71 pTable RN 11 72 73 Step1 RN 10 74 SrcStepMinus1 RN 14 75 76 dACoeff DN D12.U8 77 dBCoeff DN D13.U8 78 dCCoeff DN D14.U8 79 dDCoeff DN D15.U8 80 81 dRow0a DN D0.U8 82 dRow0b DN D1.U8 83 dRow1a DN D2.U8 84 dRow1b DN D3.U8 85 86 qRow0a QN Q2.S16 87 qRow0b QN Q3.S16 88 89 ;//dIndex DN D16.U8 90 qRow1a QN Q11.S16 91 qRow1b QN Q12.S16 92 93 dRow2a DN D16.U8 94 dRow2b DN D17.U8 95 dRow3a DN D18.U8 96 dRow3b DN D19.U8 97 98 qOutRow2 QN Q11.U16 99 qOutRow3 QN Q12.U16 100 dOutRow2 DN D20.U8 101 dOutRow3 DN D21.U8 102 dOutRow2U64 DN D20.U64 103 dOutRow3U64 DN D21.U64 104 105 qOutRow0 QN Q2.U16 106 qOutRow1 QN Q3.U16 107 dOutRow0 DN D8.U8 108 dOutRow1 DN D9.U8 109 110 dOutRow0U64 DN D8.U64 111 dOutRow1U64 DN D9.U64 112 113 dOutRow0U32 DN D8.U32 114 dOutRow1U32 DN D9.U32 115 116 dOutRow0U16 DN D8.U16 117 dOutRow1U16 DN D9.U16 118 119 120 dOut0U64 DN D0.U64 121 dOut1U64 DN D1.U64 122 123 dOut00U32 DN D0.U32 124 dOut01U32 DN D1.U32 125 dOut10U32 DN D2.U32 126 dOut11U32 DN D3.U32 127 128 dOut0U16 DN D0.U16 129 dOut1U16 DN D1.U16 130 131 ;//----------------------------------------------------------------------------------------------- 132 ;// armVCM4P10_Interpolate_Chroma_asm starts 133 ;//----------------------------------------------------------------------------------------------- 134 135 ;// Write function header 136 M_START armVCM4P10_Interpolate_Chroma, r11, d15 137 138 ;// Define stack arguments 139 M_ARG Width, 4 140 M_ARG Height, 4 141 M_ARG Dx, 4 142 M_ARG Dy, 4 143 144 ;// Load argument from the stack 145 ;// M_STALL ARM1136JS=4 146 147 M_LDRD dx, dy, Dx 148 M_LDRD iWidth, iHeight, Width 149 150 ;// EightMinusdx = 8 - dx 151 ;// EightMinusdy = 8 - dy 152 153 ;// ACoeff = EightMinusdx * EightMinusdy 154 ;// BCoeff = dx * EightMinusdy 155 ;// CCoeff = EightMinusdx * dy 156 ;// DCoeff = dx * dy 157 158 RSB EightMinusdx, dx, #8 159 RSB EightMinusdy, dy, #8 160 CMN dx,dy 161 MOV Step1, #1 162 LDREQ pTable, =armVCM4P10_WidthBranchTableMVIsZero 163 SUB SrcStepMinus1, iSrcStep, Step1 164 LDRNE pTable, =armVCM4P10_WidthBranchTableMVIsNotZero 165 166 VLD1 dRow0a, [pSrc], Step1 ;// 0a 167 168 SMULBB ACoeff, EightMinusdx, EightMinusdy 169 SMULBB BCoeff, dx, EightMinusdy 170 VLD1 dRow0b, [pSrc], SrcStepMinus1 ;// 0b 171 SMULBB CCoeff, EightMinusdx, dy 172 SMULBB DCoeff, dx, dy 173 174 VDUP dACoeff, ACoeff 175 VDUP dBCoeff, BCoeff 176 VDUP dCCoeff, CCoeff 177 VDUP dDCoeff, DCoeff 178 179 LDR pc, [pTable, iWidth, LSL #1] ;// Branch to the case based on iWidth 180 181 ;// Pixel layout: 182 ;// 183 ;// x00 x01 x02 184 ;// x10 x11 x12 185 ;// x20 x21 x22 186 187 ;// If fractionl mv is not (0, 0) 188 WidthIs8MVIsNotZero 189 190 VLD1 dRow1a, [pSrc], Step1 ;// 1a 191 VMULL qRow0a, dRow0a, dACoeff 192 VLD1 dRow1b, [pSrc], SrcStepMinus1 ;// 1b 193 VMULL qRow0b, dRow1a, dACoeff 194 VLD1 dRow2a, [pSrc], Step1 ;// 2a 195 VMLAL qRow0a, dRow0b, dBCoeff 196 VLD1 dRow2b, [pSrc], SrcStepMinus1 ;// 2b 197 VMULL qRow1a, dRow2a, dACoeff 198 VMLAL qRow0b, dRow1b, dBCoeff 199 VLD1 dRow3a, [pSrc], Step1 ;// 3a 200 VMLAL qRow0a, dRow1a, dCCoeff 201 VMLAL qRow1a, dRow2b, dBCoeff 202 VMULL qRow1b, dRow3a, dACoeff 203 VLD1 dRow3b, [pSrc], SrcStepMinus1 ;// 3b 204 VMLAL qRow0b, dRow2a, dCCoeff 205 VLD1 dRow0a, [pSrc], Step1 ;// 0a 206 VMLAL qRow1b, dRow3b, dBCoeff 207 VMLAL qRow1a, dRow3a, dCCoeff 208 VMLAL qRow0a, dRow1b, dDCoeff 209 VLD1 dRow0b, [pSrc], SrcStepMinus1 ;// 0b 210 VMLAL qRow1b, dRow0a, dCCoeff 211 VMLAL qRow0b, dRow2b, dDCoeff 212 VMLAL qRow1a, dRow3b, dDCoeff 213 214 215 SUBS iHeight, iHeight, #4 216 VMLAL qRow1b, dRow0b, dDCoeff 217 218 VQRSHRN dOutRow0, qOutRow0, #6 219 VQRSHRN dOutRow1, qOutRow1, #6 220 VQRSHRN dOutRow2, qOutRow2, #6 221 VST1 dOutRow0U64, [pDst], iDstStep 222 VQRSHRN dOutRow3, qOutRow3, #6 223 224 VST1 dOutRow1U64, [pDst], iDstStep 225 VST1 dOutRow2U64, [pDst], iDstStep 226 VST1 dOutRow3U64, [pDst], iDstStep 227 228 229 BGT WidthIs8MVIsNotZero 230 MOV return, #OMX_Sts_NoErr 231 M_EXIT 232 233 WidthIs4MVIsNotZero 234 235 VLD1 dRow1a, [pSrc], Step1 236 VMULL qRow0a, dRow0a, dACoeff 237 VMULL qRow0b, dRow1a, dACoeff 238 VLD1 dRow1b, [pSrc], SrcStepMinus1 239 VMLAL qRow0a, dRow0b, dBCoeff 240 VMLAL qRow0b, dRow1b, dBCoeff 241 VLD1 dRow0a, [pSrc], Step1 242 VMLAL qRow0a, dRow1a, dCCoeff 243 VMLAL qRow0b, dRow0a, dCCoeff 244 VLD1 dRow0b, [pSrc], SrcStepMinus1 245 SUBS iHeight, iHeight, #2 246 VMLAL qRow0b, dRow0b, dDCoeff 247 VMLAL qRow0a, dRow1b, dDCoeff 248 249 VQRSHRN dOutRow1, qOutRow1, #6 250 VQRSHRN dOutRow0, qOutRow0, #6 251 252 VST1 dOutRow0U32[0], [pDst], iDstStep 253 VST1 dOutRow1U32[0], [pDst], iDstStep 254 255 BGT WidthIs4MVIsNotZero 256 MOV return, #OMX_Sts_NoErr 257 M_EXIT 258 259 WidthIs2MVIsNotZero 260 261 VLD1 dRow1a, [pSrc], Step1 262 VMULL qRow0a, dRow0a, dACoeff 263 VMULL qRow0b, dRow1a, dACoeff 264 VLD1 dRow1b, [pSrc], SrcStepMinus1 265 VMLAL qRow0a, dRow0b, dBCoeff 266 VMLAL qRow0b, dRow1b, dBCoeff 267 VLD1 dRow0a, [pSrc], Step1 268 VMLAL qRow0a, dRow1a, dCCoeff 269 VMLAL qRow0b, dRow0a, dCCoeff 270 VLD1 dRow0b, [pSrc], SrcStepMinus1 271 SUBS iHeight, iHeight, #2 272 VMLAL qRow0b, dRow0b, dDCoeff 273 VMLAL qRow0a, dRow1b, dDCoeff 274 275 VQRSHRN dOutRow1, qOutRow1, #6 276 VQRSHRN dOutRow0, qOutRow0, #6 277 278 VST1 dOutRow0U16[0], [pDst], iDstStep 279 VST1 dOutRow1U16[0], [pDst], iDstStep 280 281 BGT WidthIs2MVIsNotZero 282 MOV return, #OMX_Sts_NoErr 283 M_EXIT 284 285 ;// If fractionl mv is (0, 0) 286 WidthIs8MVIsZero 287 SUB pSrc, pSrc, iSrcStep 288 289 WidthIs8LoopMVIsZero 290 VLD1 dRow0a, [pSrc], iSrcStep 291 SUBS iHeight, iHeight, #2 292 VLD1 dRow0b, [pSrc], iSrcStep 293 VST1 dOut0U64, [pDst], iDstStep 294 VST1 dOut1U64, [pDst], iDstStep 295 BGT WidthIs8LoopMVIsZero 296 297 MOV return, #OMX_Sts_NoErr 298 M_EXIT 299 300 WidthIs4MVIsZero 301 VLD1 dRow0b, [pSrc], iSrcStep 302 303 SUBS iHeight, iHeight, #2 304 305 VST1 dOut00U32[0], [pDst], iDstStep 306 VLD1 dRow0a, [pSrc], iSrcStep 307 VST1 dOut01U32[0], [pDst], iDstStep 308 309 BGT WidthIs4MVIsZero 310 MOV return, #OMX_Sts_NoErr 311 M_EXIT 312 313 WidthIs2MVIsZero 314 VLD1 dRow0b, [pSrc], iSrcStep 315 SUBS iHeight, iHeight, #2 316 317 VST1 dOut0U16[0], [pDst], iDstStep 318 VLD1 dRow0a, [pSrc], iSrcStep 319 VST1 dOut1U16[0], [pDst], iDstStep 320 321 BGT WidthIs2MVIsZero 322 MOV return, #OMX_Sts_NoErr 323 M_END 324 325 ENDIF ;// CortexA8 326 327 END 328 329 ;//----------------------------------------------------------------------------------------------- 330 ;// armVCM4P10_Interpolate_Chroma_asm ends 331 ;//----------------------------------------------------------------------------------------------- 332 333