1 ;// 2 ;// Copyright (C) 2007-2008 ARM Limited 3 ;// 4 ;// Licensed under the Apache License, Version 2.0 (the "License"); 5 ;// you may not use this file except in compliance with the License. 6 ;// You may obtain a copy of the License at 7 ;// 8 ;// http://www.apache.org/licenses/LICENSE-2.0 9 ;// 10 ;// Unless required by applicable law or agreed to in writing, software 11 ;// distributed under the License is distributed on an "AS IS" BASIS, 12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ;// See the License for the specific language governing permissions and 14 ;// limitations under the License. 15 ;// 16 ;// 17 ;// 18 ;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s 19 ;// OpenMAX DL: v1.0.2 20 ;// Revision: 12290 21 ;// Date: Wednesday, April 9, 2008 22 ;// 23 ;// 24 ;// 25 ;// 26 27 INCLUDE omxtypes_s.h 28 INCLUDE armCOMM_s.h 29 30 EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 31 32 M_VARIANTS CortexA8 33 34 IF CortexA8 35 M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r11 36 37 ;// Declare input registers 38 pSrc RN 0 39 srcStep RN 1 40 pDst RN 2 41 dstStep RN 3 42 43 ;// Declare Neon registers 44 dTCoeff5 DN 30.U8 45 dTCoeff20 DN 31.U8 46 dCoeff5 DN 30.S16 47 dCoeff20 DN 31.S16 48 49 qSrcA01 QN 0.U8 50 qSrcB23 QN 1.U8 51 qSrcC45 QN 2.U8 52 qSrcD67 QN 3.U8 53 qSrcE89 QN 4.U8 54 qSrcF1011 QN 5.U8 55 qSrcG1213 QN 6.U8 56 qSrcH1415 QN 7.U8 57 qSrcI1617 QN 8.U8 58 59 dSrcA0 DN 0.U8 60 dSrcB2 DN 2.U8 61 dSrcC4 DN 4.U8 62 dSrcD6 DN 6.U8 63 dSrcE8 DN 8.U8 64 dSrcF10 DN 10.U8 65 dSrcG12 DN 12.U8 66 dSrcH14 DN 14.U8 67 dSrcI16 DN 16.U8 68 69 dSrcA1 DN 1.U8 70 dSrcB3 DN 3.U8 71 dSrcC5 DN 5.U8 72 dSrcD7 DN 7.U8 73 dSrcE9 DN 9.U8 74 dSrcF11 DN 11.U8 75 dSrcG13 DN 13.U8 76 dSrcH15 DN 15.U8 77 dSrcI17 DN 17.U8 78 79 qTempP01 QN 9.S16 80 qTempQ01 QN 10.S16 81 qTempR01 QN 11.S16 82 qTempS01 QN 12.S16 83 84 qTempP23 QN 0.S16 85 qTempQ23 QN 1.S16 86 qTempR23 QN 2.S16 87 qTempS23 QN 3.S16 88 89 dTempP0 DN 18.S16 90 dTempP1 DN 19.S16 91 dTempP2 DN 0.S16 92 93 dTempQ0 DN 20.S16 94 dTempQ1 DN 21.S16 95 dTempQ2 DN 2.S16 96 97 dTempR0 DN 22.S16 98 dTempR1 DN 23.S16 99 dTempR2 DN 4.S16 100 101 dTempS0 DN 24.S16 102 dTempS1 DN 25.S16 103 dTempS2 DN 6.S16 104 105 dTempB0 DN 26.S16 106 dTempC0 DN 27.S16 107 dTempD0 DN 28.S16 108 dTempF0 DN 29.S16 109 110 dTempAcc0 DN 0.U16 111 dTempAcc1 DN 2.U16 112 dTempAcc2 DN 4.U16 113 dTempAcc3 DN 6.U16 114 115 dAcc0 DN 0.U8 116 dAcc1 DN 2.U8 117 dAcc2 DN 4.U8 118 dAcc3 DN 6.U8 119 120 qAcc0 QN 0.S32 121 qAcc1 QN 1.S32 122 qAcc2 QN 2.S32 123 qAcc3 QN 3.S32 124 125 qTAcc0 QN 0.U16 126 qTAcc1 QN 1.U16 127 qTAcc2 QN 2.U16 128 qTAcc3 QN 3.U16 129 130 qTmp QN 4.S16 131 dTmp DN 8.S16 132 133 VLD1 qSrcA01, [pSrc], srcStep ;// [a0 a1 a2 a3 .. a15] 134 ADD r12, pSrc, srcStep, LSL #2 135 VMOV dTCoeff5, #5 136 VMOV dTCoeff20, #20 137 VLD1 qSrcF1011, [r12], srcStep 138 VLD1 qSrcB23, [pSrc], srcStep ;// [b0 b1 b2 b3 .. b15] 139 140 VLD1 qSrcG1213, [r12], srcStep 141 VADDL qTempP01, dSrcA0, dSrcF10 142 VLD1 qSrcC45, [pSrc], srcStep ;// [c0 c1 c2 c3 .. c15] 143 VADDL qTempP23, dSrcA1, dSrcF11 144 VLD1 qSrcD67, [pSrc], srcStep 145 VADDL qTempQ01, dSrcB2, dSrcG12 146 VLD1 qSrcE89, [pSrc], srcStep 147 148 ;//t0 149 VMLAL qTempP01, dSrcC4, dTCoeff20 150 151 VLD1 qSrcH1415, [r12], srcStep 152 153 VMLAL qTempP23, dSrcC5, dTCoeff20 154 155 VLD1 qSrcI1617, [r12], srcStep ;// [i0 i1 i2 i3 .. ] 156 157 VMLAL qTempP01, dSrcD6, dTCoeff20 158 VMLAL qTempQ01, dSrcD6, dTCoeff20 159 VMLSL qTempP23, dSrcB3, dTCoeff5 160 161 VADDL qTempR01, dSrcC4, dSrcH14 162 163 VMLSL qTempP01, dSrcB2, dTCoeff5 164 165 VADDL qTempQ23, dSrcB3, dSrcG13 166 167 VMLAL qTempP23, dSrcD7, dTCoeff20 168 VMLAL qTempQ01, dSrcE8, dTCoeff20 169 170 VMLSL qTempP01, dSrcE8, dTCoeff5 171 VMLAL qTempQ23, dSrcD7, dTCoeff20 172 173 VMLSL qTempP23, dSrcE9, dTCoeff5 174 175 ;//t1 176 177 VMLAL qTempR01, dSrcE8, dTCoeff20 178 VMLSL qTempQ01, dSrcC4, dTCoeff5 179 VMLSL qTempQ23, dSrcC5, dTCoeff5 180 VADDL qTempR23, dSrcC5, dSrcH15 181 182 VMLAL qTempR01, dSrcF10, dTCoeff20 183 VMLSL qTempQ01, dSrcF10, dTCoeff5 184 VMLAL qTempQ23, dSrcE9, dTCoeff20 185 VMLAL qTempR23, dSrcE9, dTCoeff20 186 VADDL qTempS01, dSrcD6, dSrcI16 187 188 189 VMLSL qTempR01, dSrcD6, dTCoeff5 190 VMLSL qTempQ23, dSrcF11, dTCoeff5 191 VMLSL qTempR23, dSrcD7, dTCoeff5 192 193 ;//t2 194 VADDL qTempS23, dSrcD7, dSrcI17 195 VMLAL qTempS01, dSrcF10, dTCoeff20 196 VMLSL qTempR01, dSrcG12, dTCoeff5 197 VMLSL qTempR23, dSrcG13, dTCoeff5 198 199 VMLAL qTempS23, dSrcF11, dTCoeff20 200 VMLAL qTempS01, dSrcG12, dTCoeff20 201 VEXT dTempB0, dTempP0, dTempP1, #1 202 VMLAL qTempR23, dSrcF11, dTCoeff20 203 204 205 ;//t3 206 VMLAL qTempS23, dSrcG13, dTCoeff20 207 VMLSL qTempS01, dSrcE8, dTCoeff5 208 VEXT dTempC0, dTempP0, dTempP1, #2 209 VMOV dCoeff20, #20 210 VMLSL qTempS23, dSrcE9, dTCoeff5 211 VMLSL qTempS01, dSrcH14, dTCoeff5 212 VEXT dTempF0, dTempP1, dTempP2, #1 213 VEXT dTempD0, dTempP0, dTempP1, #3 214 VMLSL qTempS23, dSrcH15, dTCoeff5 215 216 VADDL qAcc0, dTempP0, dTempF0 217 VADD dTempC0, dTempC0, dTempD0 218 ;//h 219 VMOV dCoeff5, #5 220 221 ;// res0 222 VADD dTempB0, dTempB0, dTempP1 223 VMLAL qAcc0, dTempC0, dCoeff20 224 VEXT dTempC0, dTempQ0, dTempQ1, #2 225 VEXT dTempD0, dTempQ0, dTempQ1, #3 226 VEXT dTempF0, dTempQ1, dTempQ2, #1 227 VMLSL qAcc0, dTempB0, dCoeff5 228 229 ;// res1 230 VEXT dTempB0, dTempQ0, dTempQ1, #1 231 VADDL qAcc1, dTempQ0, dTempF0 232 VADD dTempC0, dTempC0, dTempD0 233 VADD dTempB0, dTempB0, dTempQ1 234 VEXT dTempD0, dTempR0, dTempR1, #3 235 VMLAL qAcc1, dTempC0, dCoeff20 236 VEXT dTempF0, dTempR1, dTempR2, #1 237 VEXT dTempC0, dTempR0, dTempR1, #2 238 VEXT dTmp, dTempR0, dTempR1, #1 239 VADDL qAcc2, dTempR0, dTempF0 240 VMLSL qAcc1, dTempB0, dCoeff5 241 ; VEXT dTempB0, dTempR0, dTempR1, #1 242 VADD dTempC0, dTempC0, dTempD0 243 244 ;// res2 245 VADD dTempB0, dTmp, dTempR1 246 VEXT dTempD0, dTempS0, dTempS1, #3 247 VMLAL qAcc2, dTempC0, dCoeff20 248 ; VADD dTempB0, dTempB0, dTempR1 249 250 ;// res3 251 VEXT dTempC0, dTempS0, dTempS1, #2 252 VEXT dTempF0, dTempS1, dTempS2, #1 253 VADD dTempC0, dTempC0, dTempD0 254 VEXT dTmp, dTempS0, dTempS1, #1 255 VADDL qAcc3, dTempS0, dTempF0 256 VMLSL qAcc2, dTempB0, dCoeff5 257 VMLAL qAcc3, dTempC0, dCoeff20 258 VADD dTmp, dTmp, dTempS1 259 VMLSL qAcc3, dTmp, dCoeff5 260 261 VQRSHRUN dTempAcc0, qAcc0, #10 262 VQRSHRUN dTempAcc1, qAcc1, #10 263 VQRSHRUN dTempAcc2, qAcc2, #10 264 VQRSHRUN dTempAcc3, qAcc3, #10 265 266 VQMOVN dAcc0, qTAcc0 267 VQMOVN dAcc1, qTAcc1 268 VQMOVN dAcc2, qTAcc2 269 VQMOVN dAcc3, qTAcc3 270 271 M_END 272 273 ENDIF 274 275 276 277 278 279 END 280 281