1 ;// 2 ;// 3 ;// File Name: armVCM4P10_Interpolate_Chroma_s.s 4 ;// OpenMAX DL: v1.0.2 5 ;// Revision: 9641 6 ;// Date: Thursday, February 7, 2008 7 ;// 8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ;// 10 ;// 11 ;// 12 13 14 INCLUDE omxtypes_s.h 15 INCLUDE armCOMM_s.h 16 17 M_VARIANTS ARM1136JS 18 19 IF ARM1136JS 20 21 ;// input registers 22 23 pSrc RN 0 24 iSrcStep RN 1 25 pDst RN 2 26 iDstStep RN 3 27 iWidth RN 4 28 iHeight RN 5 29 dx RN 6 30 dy RN 7 31 32 33 ;// local variable registers 34 temp RN 11 35 r0x20 RN 12 36 tmp0x20 RN 14 37 return RN 0 38 dxPlusdy RN 10 39 EightMinusdx RN 8 40 EightMinusdy RN 9 41 dxEightMinusdx RN 8 42 BACoeff RN 6 43 DCCoeff RN 7 44 45 iDstStepx2MinusWidth RN 8 46 iSrcStepx2MinusWidth RN 9 47 iSrcStep1 RN 10 48 49 pSrc1 RN 1 50 pSrc2 RN 8 51 pDst1 RN 8 52 pDst2 RN 12 53 54 pix00 RN 8 55 pix01 RN 9 56 pix10 RN 10 57 pix11 RN 11 58 59 Out0100 RN 8 60 Out1110 RN 10 61 62 x00 RN 8 63 x01 RN 10 64 x02 RN 12 65 x10 RN 9 66 x11 RN 11 67 x12 RN 14 68 x20 RN 10 69 x21 RN 12 70 x22 RN 14 71 72 x01x00 RN 8 73 x02x01 RN 10 74 x11x10 RN 9 75 x12x11 RN 11 76 x21x20 RN 10 77 x22x21 RN 12 78 79 OutRow00 RN 12 80 OutRow01 RN 14 81 OutRow10 RN 10 82 OutRow11 RN 12 83 84 OutRow0100 RN 12 85 OutRow1110 RN 12 86 87 ;//----------------------------------------------------------------------------------------------- 88 ;// armVCM4P10_Interpolate_Chroma_asm starts 89 ;//----------------------------------------------------------------------------------------------- 90 91 ;// Write function header 92 M_START armVCM4P10_Interpolate_Chroma, r11 93 94 ;// Define stack arguments 95 M_ARG Width, 4 96 M_ARG Height, 4 97 M_ARG Dx, 4 98 M_ARG Dy, 4 99 100 ;// Load argument from the stack 101 ;// M_STALL ARM1136JS=4 102 103 M_LDR iWidth, Width 104 M_LDR iHeight, Height 105 M_LDR dx, Dx 106 M_LDR dy, Dy 107 108 ;// EightMinusdx = 8 - dx 109 ;// EightMinusdy = 8 - dy 110 111 ;// ACoeff = EightMinusdx * EightMinusdy 112 ;// BCoeff = dx * EightMinusdy 113 ;// CCoeff = EightMinusdx * dy 114 ;// DCoeff = dx * dy 115 116 ADD pSrc1, pSrc, iSrcStep 117 SUB temp, iWidth, #1 118 RSB EightMinusdx, dx, #8 119 RSB EightMinusdy, dy, #8 120 CMN dx,dy 121 ADD dxEightMinusdx, EightMinusdx, dx, LSL #16 122 ORR iWidth, iWidth, temp, LSL #16 123 124 ;// Packed Coeffs. 125 126 MUL BACoeff, dxEightMinusdx, EightMinusdy 127 MUL DCCoeff, dxEightMinusdx, dy 128 129 130 ;// Checking either of dx and dy being non-zero 131 132 BEQ MVIsZero 133 134 ;// Pixel layout: 135 ;// 136 ;// x00 x01 x02 137 ;// x10 x11 x12 138 ;// x20 x21 x22 139 140 ;// If fractionl mv is not (0, 0) 141 142 OuterLoopMVIsNotZero 143 144 InnerLoopMVIsNotZero 145 146 LDRB x00, [pSrc, #+0] 147 LDRB x10, [pSrc1, #+0] 148 LDRB x01, [pSrc, #+1] 149 LDRB x11, [pSrc1, #+1] 150 LDRB x02, [pSrc, #+2]! 151 LDRB x12, [pSrc1, #+2]! 152 153 ORR x01x00, x00, x01, LSL #16 154 ;// M_STALL ARM1136JS=1 155 ORR x02x01, x01, x02, LSL #16 156 MOV r0x20, #32 157 ORR x11x10, x10, x11, LSL #16 158 ORR x12x11, x11, x12, LSL #16 159 160 SMLAD x01x00, x01x00, BACoeff, r0x20 161 SMLAD x02x01, x02x01, BACoeff, r0x20 162 163 ;// iWidth packed with MSB (top 16 bits) 164 ;// as inner loop counter value i.e 165 ;// (iWidth -1) and LSB (lower 16 bits) 166 ;// as original width 167 168 SUBS iWidth, iWidth, #1<<17 169 170 SMLAD OutRow00, x11x10, DCCoeff, x01x00 171 SMLAD OutRow01, x12x11, DCCoeff, x02x01 172 173 RSB pSrc2, pSrc, pSrc1, LSL #1 174 175 MOV OutRow00, OutRow00, LSR #6 176 MOV OutRow01, OutRow01, LSR #6 177 178 LDRB x20,[pSrc2, #-2] 179 180 ORR OutRow0100, OutRow00, OutRow01, LSL #8 181 STRH OutRow0100, [pDst], #2 182 183 LDRB x21,[pSrc2, #-1] 184 LDRB x22,[pSrc2, #+0] 185 186 ADD pDst1, pDst, iDstStep 187 188 ;// M_STALL ARM1136JS=1 189 190 ORR x21x20, x20, x21, LSL #16 191 ORR x22x21, x21, x22, LSL #16 192 193 MOV tmp0x20, #32 194 195 ;// Reusing the packed data x11x10 and x12x11 196 197 SMLAD x11x10, x11x10, BACoeff, tmp0x20 198 SMLAD x12x11, x12x11, BACoeff, tmp0x20 199 SMLAD OutRow10, x21x20, DCCoeff, x11x10 200 SMLAD OutRow11, x22x21, DCCoeff, x12x11 201 202 MOV OutRow10, OutRow10, LSR #6 203 MOV OutRow11, OutRow11, LSR #6 204 205 ;// M_STALL ARM1136JS=1 206 207 ORR OutRow1110, OutRow10, OutRow11, LSL #8 208 209 STRH OutRow1110, [pDst1, #-2] 210 211 BGT InnerLoopMVIsNotZero 212 213 SUBS iHeight, iHeight, #2 214 ADD iWidth, iWidth, #1<<16 215 RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 216 SUB iSrcStep1, pSrc1, pSrc 217 SUB temp, iWidth, #1 218 RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 219 ADD pDst, pDst, iDstStepx2MinusWidth 220 ADD pSrc1, pSrc1, iSrcStepx2MinusWidth 221 ADD pSrc, pSrc, iSrcStepx2MinusWidth 222 ORR iWidth, iWidth, temp, LSL #16 223 BGT OuterLoopMVIsNotZero 224 MOV return, #OMX_Sts_NoErr 225 M_EXIT 226 227 ;// If fractionl mv is (0, 0) 228 229 MVIsZero 230 ;// M_STALL ARM1136JS=4 231 OuterLoopMVIsZero 232 233 InnerLoopMVIsZero 234 235 LDRB pix00, [pSrc], #+1 236 LDRB pix01, [pSrc], #+1 237 LDRB pix10, [pSrc1], #+1 238 LDRB pix11, [pSrc1], #+1 239 240 ADD pDst2, pDst, iDstStep 241 SUBS iWidth, iWidth, #1<<17 242 243 ORR Out0100, pix00, pix01, LSL #8 244 ORR Out1110, pix10, pix11, LSL #8 245 246 STRH Out0100, [pDst], #2 247 STRH Out1110, [pDst2], #2 248 249 BGT InnerLoopMVIsZero 250 251 SUBS iHeight, iHeight, #2 252 ADD iWidth, iWidth, #1<<16 253 RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 254 SUB iSrcStep1, pSrc1, pSrc 255 SUB temp, iWidth, #1 256 RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 257 ADD pDst, pDst, iDstStepx2MinusWidth 258 ADD pSrc1, pSrc1, iSrcStepx2MinusWidth 259 ADD pSrc, pSrc, iSrcStepx2MinusWidth 260 ORR iWidth, iWidth, temp, LSL #16 261 BGT OuterLoopMVIsZero 262 MOV return, #OMX_Sts_NoErr 263 M_END 264 265 ENDIF ;// ARM1136JS 266 267 268 END 269 270 ;//----------------------------------------------------------------------------------------------- 271 ;// armVCM4P10_Interpolate_Chroma_asm ends 272 ;//----------------------------------------------------------------------------------------------- 273 274