1 ;// 2 ;// Copyright (C) 2007-2008 ARM Limited 3 ;// 4 ;// Licensed under the Apache License, Version 2.0 (the "License"); 5 ;// you may not use this file except in compliance with the License. 6 ;// You may obtain a copy of the License at 7 ;// 8 ;// http://www.apache.org/licenses/LICENSE-2.0 9 ;// 10 ;// Unless required by applicable law or agreed to in writing, software 11 ;// distributed under the License is distributed on an "AS IS" BASIS, 12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ;// See the License for the specific language governing permissions and 14 ;// limitations under the License. 15 ;// 16 ;// 17 ;// 18 ;// File Name: armVCM4P10_Interpolate_Chroma_s.s 19 ;// OpenMAX DL: v1.0.2 20 ;// Revision: 9641 21 ;// Date: Thursday, February 7, 2008 22 ;// 23 ;// 24 ;// 25 ;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31 M_VARIANTS ARM1136JS 32 33 IF ARM1136JS 34 35 ;// input registers 36 37 pSrc RN 0 38 iSrcStep RN 1 39 pDst RN 2 40 iDstStep RN 3 41 iWidth RN 4 42 iHeight RN 5 43 dx RN 6 44 dy RN 7 45 46 47 ;// local variable registers 48 temp RN 11 49 r0x20 RN 12 50 tmp0x20 RN 14 51 return RN 0 52 dxPlusdy RN 10 53 EightMinusdx RN 8 54 EightMinusdy RN 9 55 dxEightMinusdx RN 8 56 BACoeff RN 6 57 DCCoeff RN 7 58 59 iDstStepx2MinusWidth RN 8 60 iSrcStepx2MinusWidth RN 9 61 iSrcStep1 RN 10 62 63 pSrc1 RN 1 64 pSrc2 RN 8 65 pDst1 RN 8 66 pDst2 RN 12 67 68 pix00 RN 8 69 pix01 RN 9 70 pix10 RN 10 71 pix11 RN 11 72 73 Out0100 RN 8 74 Out1110 RN 10 75 76 x00 RN 8 77 x01 RN 10 78 x02 RN 12 79 x10 RN 9 80 x11 RN 11 81 x12 RN 14 82 x20 RN 10 83 x21 RN 12 84 x22 RN 14 85 86 x01x00 RN 8 87 x02x01 RN 10 88 x11x10 RN 9 89 x12x11 RN 11 90 x21x20 RN 10 91 x22x21 RN 12 92 93 OutRow00 RN 12 94 OutRow01 RN 14 95 OutRow10 RN 10 96 OutRow11 RN 12 97 98 OutRow0100 RN 12 99 OutRow1110 RN 12 100 101 ;//----------------------------------------------------------------------------------------------- 102 ;// armVCM4P10_Interpolate_Chroma_asm starts 103 ;//----------------------------------------------------------------------------------------------- 104 105 ;// Write function header 106 M_START armVCM4P10_Interpolate_Chroma, r11 107 108 ;// Define stack arguments 109 M_ARG Width, 4 110 M_ARG Height, 4 111 M_ARG Dx, 4 112 M_ARG Dy, 4 113 114 ;// Load argument from the stack 115 ;// M_STALL ARM1136JS=4 116 117 M_LDR iWidth, Width 118 M_LDR iHeight, Height 119 M_LDR dx, Dx 120 M_LDR dy, Dy 121 122 ;// EightMinusdx = 8 - dx 123 ;// EightMinusdy = 8 - dy 124 125 ;// ACoeff = EightMinusdx * EightMinusdy 126 ;// BCoeff = dx * EightMinusdy 127 ;// CCoeff = EightMinusdx * dy 128 ;// DCoeff = dx * dy 129 130 ADD pSrc1, pSrc, iSrcStep 131 SUB temp, iWidth, #1 132 RSB EightMinusdx, dx, #8 133 RSB EightMinusdy, dy, #8 134 CMN dx,dy 135 ADD dxEightMinusdx, EightMinusdx, dx, LSL #16 136 ORR iWidth, iWidth, temp, LSL #16 137 138 ;// Packed Coeffs. 139 140 MUL BACoeff, dxEightMinusdx, EightMinusdy 141 MUL DCCoeff, dxEightMinusdx, dy 142 143 144 ;// Checking either of dx and dy being non-zero 145 146 BEQ MVIsZero 147 148 ;// Pixel layout: 149 ;// 150 ;// x00 x01 x02 151 ;// x10 x11 x12 152 ;// x20 x21 x22 153 154 ;// If fractionl mv is not (0, 0) 155 156 OuterLoopMVIsNotZero 157 158 InnerLoopMVIsNotZero 159 160 LDRB x00, [pSrc, #+0] 161 LDRB x10, [pSrc1, #+0] 162 LDRB x01, [pSrc, #+1] 163 LDRB x11, [pSrc1, #+1] 164 LDRB x02, [pSrc, #+2]! 165 LDRB x12, [pSrc1, #+2]! 166 167 ORR x01x00, x00, x01, LSL #16 168 ;// M_STALL ARM1136JS=1 169 ORR x02x01, x01, x02, LSL #16 170 MOV r0x20, #32 171 ORR x11x10, x10, x11, LSL #16 172 ORR x12x11, x11, x12, LSL #16 173 174 SMLAD x01x00, x01x00, BACoeff, r0x20 175 SMLAD x02x01, x02x01, BACoeff, r0x20 176 177 ;// iWidth packed with MSB (top 16 bits) 178 ;// as inner loop counter value i.e 179 ;// (iWidth -1) and LSB (lower 16 bits) 180 ;// as original width 181 182 SUBS iWidth, iWidth, #1<<17 183 184 SMLAD OutRow00, x11x10, DCCoeff, x01x00 185 SMLAD OutRow01, x12x11, DCCoeff, x02x01 186 187 RSB pSrc2, pSrc, pSrc1, LSL #1 188 189 MOV OutRow00, OutRow00, LSR #6 190 MOV OutRow01, OutRow01, LSR #6 191 192 LDRB x20,[pSrc2, #-2] 193 194 ORR OutRow0100, OutRow00, OutRow01, LSL #8 195 STRH OutRow0100, [pDst], #2 196 197 LDRB x21,[pSrc2, #-1] 198 LDRB x22,[pSrc2, #+0] 199 200 ADD pDst1, pDst, iDstStep 201 202 ;// M_STALL ARM1136JS=1 203 204 ORR x21x20, x20, x21, LSL #16 205 ORR x22x21, x21, x22, LSL #16 206 207 MOV tmp0x20, #32 208 209 ;// Reusing the packed data x11x10 and x12x11 210 211 SMLAD x11x10, x11x10, BACoeff, tmp0x20 212 SMLAD x12x11, x12x11, BACoeff, tmp0x20 213 SMLAD OutRow10, x21x20, DCCoeff, x11x10 214 SMLAD OutRow11, x22x21, DCCoeff, x12x11 215 216 MOV OutRow10, OutRow10, LSR #6 217 MOV OutRow11, OutRow11, LSR #6 218 219 ;// M_STALL ARM1136JS=1 220 221 ORR OutRow1110, OutRow10, OutRow11, LSL #8 222 223 STRH OutRow1110, [pDst1, #-2] 224 225 BGT InnerLoopMVIsNotZero 226 227 SUBS iHeight, iHeight, #2 228 ADD iWidth, iWidth, #1<<16 229 RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 230 SUB iSrcStep1, pSrc1, pSrc 231 SUB temp, iWidth, #1 232 RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 233 ADD pDst, pDst, iDstStepx2MinusWidth 234 ADD pSrc1, pSrc1, iSrcStepx2MinusWidth 235 ADD pSrc, pSrc, iSrcStepx2MinusWidth 236 ORR iWidth, iWidth, temp, LSL #16 237 BGT OuterLoopMVIsNotZero 238 MOV return, #OMX_Sts_NoErr 239 M_EXIT 240 241 ;// If fractionl mv is (0, 0) 242 243 MVIsZero 244 ;// M_STALL ARM1136JS=4 245 OuterLoopMVIsZero 246 247 InnerLoopMVIsZero 248 249 LDRB pix00, [pSrc], #+1 250 LDRB pix01, [pSrc], #+1 251 LDRB pix10, [pSrc1], #+1 252 LDRB pix11, [pSrc1], #+1 253 254 ADD pDst2, pDst, iDstStep 255 SUBS iWidth, iWidth, #1<<17 256 257 ORR Out0100, pix00, pix01, LSL #8 258 ORR Out1110, pix10, pix11, LSL #8 259 260 STRH Out0100, [pDst], #2 261 STRH Out1110, [pDst2], #2 262 263 BGT InnerLoopMVIsZero 264 265 SUBS iHeight, iHeight, #2 266 ADD iWidth, iWidth, #1<<16 267 RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 268 SUB iSrcStep1, pSrc1, pSrc 269 SUB temp, iWidth, #1 270 RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 271 ADD pDst, pDst, iDstStepx2MinusWidth 272 ADD pSrc1, pSrc1, iSrcStepx2MinusWidth 273 ADD pSrc, pSrc, iSrcStepx2MinusWidth 274 ORR iWidth, iWidth, temp, LSL #16 275 BGT OuterLoopMVIsZero 276 MOV return, #OMX_Sts_NoErr 277 M_END 278 279 ENDIF ;// ARM1136JS 280 281 282 END 283 284 ;//----------------------------------------------------------------------------------------------- 285 ;// armVCM4P10_Interpolate_Chroma_asm ends 286 ;//----------------------------------------------------------------------------------------------- 287 288