1 ;// 2 ;// Copyright (C) 2007-2008 ARM Limited 3 ;// 4 ;// Licensed under the Apache License, Version 2.0 (the "License"); 5 ;// you may not use this file except in compliance with the License. 6 ;// You may obtain a copy of the License at 7 ;// 8 ;// http://www.apache.org/licenses/LICENSE-2.0 9 ;// 10 ;// Unless required by applicable law or agreed to in writing, software 11 ;// distributed under the License is distributed on an "AS IS" BASIS, 12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ;// See the License for the specific language governing permissions and 14 ;// limitations under the License. 15 ;// 16 ;// 17 ;// 18 ;// File Name: armVCM4P10_DeblockingChroma_unsafe_s.s 19 ;// OpenMAX DL: v1.0.2 20 ;// Revision: 12290 21 ;// Date: Wednesday, April 9, 2008 22 ;// 23 ;// 24 ;// 25 ;// 26 27 INCLUDE omxtypes_s.h 28 INCLUDE armCOMM_s.h 29 30 M_VARIANTS CortexA8 31 32 33 IF CortexA8 34 35 pAlpha RN 2 36 pBeta RN 3 37 38 pThresholds RN 5 39 pBS RN 4 40 bS3210 RN 6 41 42 ;// Pixels 43 dP_0 DN D4.U8 44 dP_1 DN D5.U8 45 dP_2 DN D6.U8 46 dP_3 DN D7.U8 47 dQ_0 DN D8.U8 48 dQ_1 DN D9.U8 49 dQ_2 DN D10.U8 50 dQ_3 DN D11.U8 51 52 53 ;// Filtering Decision 54 dAlpha DN D0.U8 55 dBeta DN D2.U8 56 57 dFilt DN D16.U8 58 dAqflg DN D12.U8 59 dApflg DN D17.U8 60 61 dAp0q0 DN D13.U8 62 63 ;// bSLT4 64 dTC3210 DN D18.U8 65 dTCs DN D31.S8 66 dTC DN D31.U8 67 68 dMask_0 DN D14.U8 69 dMask_1 DN D15.U8 70 dMask_4 DN D26.U16 71 72 dTemp DN D28.U8 73 dDummy DN D17.U8 74 75 ;// Computing P0,Q0 76 qDq0p0 QN Q10.S16 77 qDp1q1 QN Q11.S16 78 qDelta QN Q10.S16 ; reuse qDq0p0 79 dDelta DN D20.S8 80 81 82 ;// Computing P1,Q1 83 qP_0n QN Q14.S16 84 qQ_0n QN Q12.S16 85 86 dQ_0n DN D24.U8 87 dP_0n DN D29.U8 88 89 ;// bSGE4 90 91 dHSp0q1 DN D13.U8 92 dHSq0p1 DN D31.U8 93 94 dBS3210 DN D28.U16 95 96 dP_0t DN D13.U8 ;dHSp0q1 97 dQ_0t DN D31.U8 ;Temp1 98 99 dP_0n DN D29.U8 100 dQ_0n DN D24.U8 ;Temp2 101 102 ;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe 103 ;// 104 ;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 105 ;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 106 ;// - Additional Params - pThresholds: r5 107 ;// 108 ;// Outputs - Pixels - P0-P1: D29-D30, Q0-Q1: D24-D25 109 ;// - Additional Params - pThresholds: r5 110 111 ;// Registers Corrupted - D18-D31 112 113 114 M_START armVCM4P10_DeblockingChromabSLT4_unsafe 115 116 117 ;dTC3210 -18 118 ;dTemp-28 119 120 VLD1 d18.U32[0], [pThresholds]! ;here 121 122 ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3; 123 ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1 124 125 ;// qDp1q1-11 126 ;// qDq0p0-10 127 VSUBL qDp1q1, dP_1, dQ_1 128 VMOV dTemp, dTC3210 129 VSUBL qDq0p0, dQ_0, dP_0 130 VSHR qDp1q1, qDp1q1, #2 131 VZIP.8 dTC3210, dTemp 132 133 ;// qDelta-qDq0p0-10 134 135 ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1) 136 137 ;// dTC3210-18 138 ;// dTemp-28 139 ;// dTC-31 140 VBIF dTC3210, dMask_0, dFilt 141 VRHADD qDelta, qDp1q1, qDq0p0 142 VADD dTC, dTC3210, dMask_1 143 VQMOVN dDelta, qDelta 144 ;// dDelta-d20 145 146 ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta); 147 VLD1 {dAlpha[]}, [pAlpha] 148 VMIN dDelta, dDelta, dTCs 149 VNEG dTCs, dTCs 150 VLD1 {dBeta[]}, [pBeta] 151 ;1 152 VMAX dDelta, dDelta, dTCs 153 154 ;// dP_0n - 29 155 ;// dQ_0n - 24 156 157 ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta); 158 ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta); 159 160 ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); 161 ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); 162 163 ;// qP_0n - 14 164 ;// qQ_0n - 12 165 166 VMOVL qP_0n, dP_0 167 VMOVL qQ_0n, dQ_0 168 169 ;1 170 VADDW qP_0n, qP_0n, dDelta 171 VSUBW qQ_0n, qQ_0n, dDelta 172 173 VQMOVUN dP_0n, qP_0n 174 VQMOVUN dQ_0n, qQ_0n 175 176 M_END 177 178 ;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe() 179 ;// 180 ;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 181 ;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 182 ;// - Additional Params - alpha: D0, dMask_1: D15 183 ;// 184 ;// Outputs - Pixels - P0-P2: D29-D31, Q0-Q2: D24,D25,D28 185 186 ;// Registers Corrupted - D18-D31 187 188 M_START armVCM4P10_DeblockingChromabSGE4_unsafe 189 190 ;dHSq0p1 - 31 191 ;dHSp0q1 - 13 192 VHADD dHSp0q1, dP_0, dQ_1 193 VHADD dHSq0p1, dQ_0, dP_1 194 195 ;// Prepare the bS mask 196 197 ;// dHSp0q1-13 198 ;// dP_0t-dHSp0q1-13 199 ;// dHSq0p1-31 200 ;// dQ_0t-Temp1-31 201 VLD1 {dAlpha[]}, [pAlpha] 202 ADD pThresholds, pThresholds, #4 203 VLD1 {dBeta[]}, [pBeta] 204 205 VRHADD dP_0t, dHSp0q1, dP_1 206 VRHADD dQ_0t, dHSq0p1, dQ_1 207 208 M_END 209 210 ENDIF 211 212 END 213