1 ;// 2 ;// 3 ;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s 4 ;// OpenMAX DL: v1.0.2 5 ;// Revision: 12290 6 ;// Date: Wednesday, April 9, 2008 7 ;// 8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ;// 10 ;// 11 ;// 12 13 INCLUDE omxtypes_s.h 14 INCLUDE armCOMM_s.h 15 16 EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 17 18 M_VARIANTS CortexA8 19 20 IF CortexA8 21 M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r11 22 23 ;// Declare input registers 24 pSrc RN 0 25 srcStep RN 1 26 pDst RN 2 27 dstStep RN 3 28 29 ;// Declare Neon registers 30 dTCoeff5 DN 30.U8 31 dTCoeff20 DN 31.U8 32 dCoeff5 DN 30.S16 33 dCoeff20 DN 31.S16 34 35 qSrcA01 QN 0.U8 36 qSrcB23 QN 1.U8 37 qSrcC45 QN 2.U8 38 qSrcD67 QN 3.U8 39 qSrcE89 QN 4.U8 40 qSrcF1011 QN 5.U8 41 qSrcG1213 QN 6.U8 42 qSrcH1415 QN 7.U8 43 qSrcI1617 QN 8.U8 44 45 dSrcA0 DN 0.U8 46 dSrcB2 DN 2.U8 47 dSrcC4 DN 4.U8 48 dSrcD6 DN 6.U8 49 dSrcE8 DN 8.U8 50 dSrcF10 DN 10.U8 51 dSrcG12 DN 12.U8 52 dSrcH14 DN 14.U8 53 dSrcI16 DN 16.U8 54 55 dSrcA1 DN 1.U8 56 dSrcB3 DN 3.U8 57 dSrcC5 DN 5.U8 58 dSrcD7 DN 7.U8 59 dSrcE9 DN 9.U8 60 dSrcF11 DN 11.U8 61 dSrcG13 DN 13.U8 62 dSrcH15 DN 15.U8 63 dSrcI17 DN 17.U8 64 65 qTempP01 QN 9.S16 66 qTempQ01 QN 10.S16 67 qTempR01 QN 11.S16 68 qTempS01 QN 12.S16 69 70 qTempP23 QN 0.S16 71 qTempQ23 QN 1.S16 72 qTempR23 QN 2.S16 73 qTempS23 QN 3.S16 74 75 dTempP0 DN 18.S16 76 dTempP1 DN 19.S16 77 dTempP2 DN 0.S16 78 79 dTempQ0 DN 20.S16 80 dTempQ1 DN 21.S16 81 dTempQ2 DN 2.S16 82 83 dTempR0 DN 22.S16 84 dTempR1 DN 23.S16 85 dTempR2 DN 4.S16 86 87 dTempS0 DN 24.S16 88 dTempS1 DN 25.S16 89 dTempS2 DN 6.S16 90 91 dTempB0 DN 26.S16 92 dTempC0 DN 27.S16 93 dTempD0 DN 28.S16 94 dTempF0 DN 29.S16 95 96 dTempAcc0 DN 0.U16 97 dTempAcc1 DN 2.U16 98 dTempAcc2 DN 4.U16 99 dTempAcc3 DN 6.U16 100 101 dAcc0 DN 0.U8 102 dAcc1 DN 2.U8 103 dAcc2 DN 4.U8 104 dAcc3 DN 6.U8 105 106 qAcc0 QN 0.S32 107 qAcc1 QN 1.S32 108 qAcc2 QN 2.S32 109 qAcc3 QN 3.S32 110 111 qTAcc0 QN 0.U16 112 qTAcc1 QN 1.U16 113 qTAcc2 QN 2.U16 114 qTAcc3 QN 3.U16 115 116 qTmp QN 4.S16 117 dTmp DN 8.S16 118 119 VLD1 qSrcA01, [pSrc], srcStep ;// [a0 a1 a2 a3 .. a15] 120 ADD r12, pSrc, srcStep, LSL #2 121 VMOV dTCoeff5, #5 122 VMOV dTCoeff20, #20 123 VLD1 qSrcF1011, [r12], srcStep 124 VLD1 qSrcB23, [pSrc], srcStep ;// [b0 b1 b2 b3 .. b15] 125 126 VLD1 qSrcG1213, [r12], srcStep 127 VADDL qTempP01, dSrcA0, dSrcF10 128 VLD1 qSrcC45, [pSrc], srcStep ;// [c0 c1 c2 c3 .. c15] 129 VADDL qTempP23, dSrcA1, dSrcF11 130 VLD1 qSrcD67, [pSrc], srcStep 131 VADDL qTempQ01, dSrcB2, dSrcG12 132 VLD1 qSrcE89, [pSrc], srcStep 133 134 ;//t0 135 VMLAL qTempP01, dSrcC4, dTCoeff20 136 137 VLD1 qSrcH1415, [r12], srcStep 138 139 VMLAL qTempP23, dSrcC5, dTCoeff20 140 141 VLD1 qSrcI1617, [r12], srcStep ;// [i0 i1 i2 i3 .. ] 142 143 VMLAL qTempP01, dSrcD6, dTCoeff20 144 VMLAL qTempQ01, dSrcD6, dTCoeff20 145 VMLSL qTempP23, dSrcB3, dTCoeff5 146 147 VADDL qTempR01, dSrcC4, dSrcH14 148 149 VMLSL qTempP01, dSrcB2, dTCoeff5 150 151 VADDL qTempQ23, dSrcB3, dSrcG13 152 153 VMLAL qTempP23, dSrcD7, dTCoeff20 154 VMLAL qTempQ01, dSrcE8, dTCoeff20 155 156 VMLSL qTempP01, dSrcE8, dTCoeff5 157 VMLAL qTempQ23, dSrcD7, dTCoeff20 158 159 VMLSL qTempP23, dSrcE9, dTCoeff5 160 161 ;//t1 162 163 VMLAL qTempR01, dSrcE8, dTCoeff20 164 VMLSL qTempQ01, dSrcC4, dTCoeff5 165 VMLSL qTempQ23, dSrcC5, dTCoeff5 166 VADDL qTempR23, dSrcC5, dSrcH15 167 168 VMLAL qTempR01, dSrcF10, dTCoeff20 169 VMLSL qTempQ01, dSrcF10, dTCoeff5 170 VMLAL qTempQ23, dSrcE9, dTCoeff20 171 VMLAL qTempR23, dSrcE9, dTCoeff20 172 VADDL qTempS01, dSrcD6, dSrcI16 173 174 175 VMLSL qTempR01, dSrcD6, dTCoeff5 176 VMLSL qTempQ23, dSrcF11, dTCoeff5 177 VMLSL qTempR23, dSrcD7, dTCoeff5 178 179 ;//t2 180 VADDL qTempS23, dSrcD7, dSrcI17 181 VMLAL qTempS01, dSrcF10, dTCoeff20 182 VMLSL qTempR01, dSrcG12, dTCoeff5 183 VMLSL qTempR23, dSrcG13, dTCoeff5 184 185 VMLAL qTempS23, dSrcF11, dTCoeff20 186 VMLAL qTempS01, dSrcG12, dTCoeff20 187 VEXT dTempB0, dTempP0, dTempP1, #1 188 VMLAL qTempR23, dSrcF11, dTCoeff20 189 190 191 ;//t3 192 VMLAL qTempS23, dSrcG13, dTCoeff20 193 VMLSL qTempS01, dSrcE8, dTCoeff5 194 VEXT dTempC0, dTempP0, dTempP1, #2 195 VMOV dCoeff20, #20 196 VMLSL qTempS23, dSrcE9, dTCoeff5 197 VMLSL qTempS01, dSrcH14, dTCoeff5 198 VEXT dTempF0, dTempP1, dTempP2, #1 199 VEXT dTempD0, dTempP0, dTempP1, #3 200 VMLSL qTempS23, dSrcH15, dTCoeff5 201 202 VADDL qAcc0, dTempP0, dTempF0 203 VADD dTempC0, dTempC0, dTempD0 204 ;//h 205 VMOV dCoeff5, #5 206 207 ;// res0 208 VADD dTempB0, dTempB0, dTempP1 209 VMLAL qAcc0, dTempC0, dCoeff20 210 VEXT dTempC0, dTempQ0, dTempQ1, #2 211 VEXT dTempD0, dTempQ0, dTempQ1, #3 212 VEXT dTempF0, dTempQ1, dTempQ2, #1 213 VMLSL qAcc0, dTempB0, dCoeff5 214 215 ;// res1 216 VEXT dTempB0, dTempQ0, dTempQ1, #1 217 VADDL qAcc1, dTempQ0, dTempF0 218 VADD dTempC0, dTempC0, dTempD0 219 VADD dTempB0, dTempB0, dTempQ1 220 VEXT dTempD0, dTempR0, dTempR1, #3 221 VMLAL qAcc1, dTempC0, dCoeff20 222 VEXT dTempF0, dTempR1, dTempR2, #1 223 VEXT dTempC0, dTempR0, dTempR1, #2 224 VEXT dTmp, dTempR0, dTempR1, #1 225 VADDL qAcc2, dTempR0, dTempF0 226 VMLSL qAcc1, dTempB0, dCoeff5 227 ; VEXT dTempB0, dTempR0, dTempR1, #1 228 VADD dTempC0, dTempC0, dTempD0 229 230 ;// res2 231 VADD dTempB0, dTmp, dTempR1 232 VEXT dTempD0, dTempS0, dTempS1, #3 233 VMLAL qAcc2, dTempC0, dCoeff20 234 ; VADD dTempB0, dTempB0, dTempR1 235 236 ;// res3 237 VEXT dTempC0, dTempS0, dTempS1, #2 238 VEXT dTempF0, dTempS1, dTempS2, #1 239 VADD dTempC0, dTempC0, dTempD0 240 VEXT dTmp, dTempS0, dTempS1, #1 241 VADDL qAcc3, dTempS0, dTempF0 242 VMLSL qAcc2, dTempB0, dCoeff5 243 VMLAL qAcc3, dTempC0, dCoeff20 244 VADD dTmp, dTmp, dTempS1 245 VMLSL qAcc3, dTmp, dCoeff5 246 247 VQRSHRUN dTempAcc0, qAcc0, #10 248 VQRSHRUN dTempAcc1, qAcc1, #10 249 VQRSHRUN dTempAcc2, qAcc2, #10 250 VQRSHRUN dTempAcc3, qAcc3, #10 251 252 VQMOVN dAcc0, qTAcc0 253 VQMOVN dAcc1, qTAcc1 254 VQMOVN dAcc2, qTAcc2 255 VQMOVN dAcc3, qTAcc3 256 257 M_END 258 259 ENDIF 260 261 262 263 264 265 END 266 267