1 ;// 2 ;// 3 ;// File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s 4 ;// OpenMAX DL: v1.0.2 5 ;// Revision: 12290 6 ;// Date: Wednesday, April 9, 2008 7 ;// 8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ;// 10 ;// 11 ;// 12 13 14 INCLUDE omxtypes_s.h 15 INCLUDE armCOMM_s.h 16 17 M_VARIANTS CortexA8 18 19 IF CortexA8 20 21 IMPORT armVCM4P10_DeblockingChromabSGE4_unsafe 22 IMPORT armVCM4P10_DeblockingChromabSLT4_unsafe 23 24 LOOP_COUNT EQU 0x40000000 25 MASK_3 EQU 0x03030303 26 MASK_4 EQU 0x04040404 27 28 ;// Function arguments 29 30 pSrcDst RN 0 31 srcdstStep RN 1 32 pAlpha RN 2 33 pBeta RN 3 34 35 pThresholds RN 5 36 pBS RN 4 37 bS3210 RN 6 38 39 ;// Loop 40 41 XY RN 7 42 43 ;// Pixels 44 dP_0 DN D4.U8 45 dP_1 DN D5.U8 46 dP_2 DN D6.U8 47 dQ_0 DN D8.U8 48 dQ_1 DN D9.U8 49 dQ_2 DN D10.U8 50 51 ;// Filtering Decision 52 dAlpha DN D0.U8 53 dBeta DN D2.U8 54 55 dFilt DN D16.U8 56 dAqflg DN D12.U8 57 dApflg DN D17.U8 58 59 dAp0q0 DN D13.U8 60 dAp1p0 DN D12.U8 61 dAq1q0 DN D18.U8 62 dAp2p0 DN D19.U8 63 dAq2q0 DN D17.U8 64 65 qBS3210 QN Q13.U16 66 dBS3210 DN D26 67 dMask_bs DN D27 68 dFilt_bs DN D26.U16 69 70 ;// bSLT4 71 dMask_0 DN D14.U8 72 dMask_1 DN D15.U8 73 dMask_4 DN D1.U16 74 75 Mask_4 RN 8 76 Mask_3 RN 9 77 78 dTemp DN D19.U8 79 80 ;// Result 81 dP_0t DN D13.U8 82 dQ_0t DN D31.U8 83 84 dP_0n DN D29.U8 85 dQ_0n DN D24.U8 86 87 88 ;// Function header 89 M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r9, d15 90 91 ;//Arguments on the stack 92 M_ARG ppThresholds, 4 93 M_ARG ppBS, 4 94 95 ;// d0-dAlpha_0 96 ;// d2-dBeta_0 97 98 ;load alpha1,beta1 somewhere to avoid more loads 99 VLD1 {dAlpha[]}, [pAlpha]! 100 SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 ;? 101 SUB pSrcDst, pSrcDst, srcdstStep 102 VLD1 {dBeta[]}, [pBeta]! 103 104 M_LDR pBS, ppBS 105 M_LDR pThresholds, ppThresholds 106 107 LDR Mask_3, =MASK_3 108 LDR Mask_4, =MASK_4 109 110 VMOV dMask_0, #0 111 VMOV dMask_1, #1 112 VMOV dMask_4, #4 113 114 LDR XY, =LOOP_COUNT 115 116 ;// p0-p3 - d4-d7 117 ;// q0-q3 - d8-d11 118 LoopY 119 LDR bS3210, [pBS], #8 120 121 VLD1 dP_2, [pSrcDst], srcdstStep 122 ;1 123 VLD1 dP_1, [pSrcDst], srcdstStep 124 CMP bS3210, #0 125 VLD1 dP_0, [pSrcDst], srcdstStep 126 ;1 127 VLD1 dQ_0, [pSrcDst], srcdstStep 128 VABD dAp2p0, dP_2, dP_0 129 VLD1 dQ_1, [pSrcDst], srcdstStep 130 VABD dAp0q0, dP_0, dQ_0 131 VLD1 dQ_2, [pSrcDst], srcdstStep 132 BEQ NoFilterBS0 133 134 VABD dAp1p0, dP_1, dP_0 135 VABD dAq1q0, dQ_1, dQ_0 136 137 VCGT dFilt, dAlpha, dAp0q0 138 VMOV.U32 dBS3210[0], bS3210 139 VMAX dAp1p0, dAq1q0, dAp1p0 140 VMOVL qBS3210, dBS3210.U8 141 VABD dAq2q0, dQ_2, dQ_0 142 VCGT dMask_bs.S16, dBS3210.S16, #0 143 144 VCGT dAp1p0, dBeta, dAp1p0 145 VCGT dAp2p0, dBeta, dAp2p0 146 147 VAND dFilt, dMask_bs.U8 148 149 TST bS3210, Mask_3 150 151 VCGT dAq2q0, dBeta, dAq2q0 152 VAND dFilt, dFilt, dAp1p0 153 154 VAND dAqflg, dFilt, dAq2q0 155 VAND dApflg, dFilt, dAp2p0 156 157 ;// bS < 4 Filtering 158 BLNE armVCM4P10_DeblockingChromabSLT4_unsafe 159 160 TST bS3210, Mask_4 161 162 SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 163 VTST dFilt_bs, dFilt_bs, dMask_4 164 165 ;// bS == 4 Filtering 166 BLNE armVCM4P10_DeblockingChromabSGE4_unsafe 167 168 VBIT dP_0n, dP_0t, dFilt_bs 169 VBIT dQ_0n, dQ_0t, dFilt_bs 170 171 VBIF dP_0n, dP_0, dFilt 172 VBIF dQ_0n, dQ_0, dFilt 173 174 ;// Result Storage 175 VST1 dP_0n, [pSrcDst], srcdstStep 176 ADDS XY, XY, XY 177 VST1 dQ_0n, [pSrcDst], srcdstStep 178 179 BNE LoopY 180 181 MOV r0, #OMX_Sts_NoErr 182 183 M_EXIT 184 185 NoFilterBS0 186 187 VLD1 {dAlpha[]}, [pAlpha] 188 SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 189 ADDS XY, XY, XY 190 VLD1 {dBeta[]}, [pBeta] 191 ADD pThresholds, pThresholds, #4 192 BNE LoopY 193 194 MOV r0, #OMX_Sts_NoErr 195 M_END 196 197 ENDIF 198 199 200 END 201 202 203