1 ; ********** 2 ; * 3 ; * File Name: omxVCM4P2_PredictReconCoefIntra_s.s 4 ; * OpenMAX DL: v1.0.2 5 ; * Revision: 12290 6 ; * Date: Wednesday, April 9, 2008 7 ; * 8 ; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ; * 10 ; * 11 ; * 12 ; * Description: 13 ; * Contains module for DC/AC coefficient prediction 14 ; * 15 ; * 16 ; * Function: omxVCM4P2_PredictReconCoefIntra 17 ; * 18 ; * Description: 19 ; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior 20 ; * to the function call, prediction direction (predDir) should be selected 21 ; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. 22 ; * 23 ; * Remarks: 24 ; * 25 ; * Parameters: 26 ; * [in] pSrcDst pointer to the coefficient buffer which contains the 27 ; * quantized coefficient residuals (PQF) of the current 28 ; * block; must be aligned on a 4-byte boundary. The 29 ; * output coefficients are saturated to the range 30 ; * [-2048, 2047]. 31 ; * [in] pPredBufRow pointer to the coefficient row buffer; must be aligned 32 ; * on a 4-byte boundary. 33 ; * [in] pPredBufCol pointer to the coefficient column buffer; must be 34 ; * aligned on a 4-byte boundary. 35 ; * [in] curQP quantization parameter of the current block. curQP may 36 ; * equal to predQP especially when the current block and 37 ; * the predictor block are in the same macroblock. 38 ; * [in] predQP quantization parameter of the predictor block 39 ; * [in] predDir indicates the prediction direction which takes one 40 ; * of the following values: 41 ; * OMX_VIDEO_HORIZONTAL predict horizontally 42 ; * OMX_VIDEO_VERTICAL predict vertically 43 ; * [in] ACPredFlag a flag indicating if AC prediction should be 44 ; * performed. It is equal to ac_pred_flag in the bit 45 ; * stream syntax of MPEG-4 46 ; * [in] videoComp video component type (luminance, chrominance or 47 ; * alpha) of the current block 48 ; * [out] pSrcDst pointer to the coefficient buffer which contains 49 ; * the quantized coefficients (QF) of the current 50 ; * block 51 ; * [out] pPredBufRow pointer to the updated coefficient row buffer 52 ; * [out] pPredBufCol pointer to the updated coefficient column buffer 53 ; * Return Value: 54 ; * OMX_Sts_NoErr - no error 55 ; * OMX_Sts_BadArgErr - Bad arguments 56 ; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol. 57 ; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31, 58 ; * predQP > 31, preDir exceeds [1,2]. 59 ; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not 60 ; * 4-byte aligned. 61 ; * 62 ; ********* 63 64 INCLUDE omxtypes_s.h 65 INCLUDE armCOMM_s.h 66 67 M_VARIANTS CortexA8 68 69 70 71 IMPORT armVCM4P2_Reciprocal_QP_S32 72 IMPORT armVCM4P2_Reciprocal_QP_S16 73 IMPORT armVCM4P2_DCScaler 74 75 IF CortexA8 76 ;// Input Arguments 77 78 pSrcDst RN 0 79 pPredBufRow RN 1 80 pPredBufCol RN 2 81 curQP RN 3 82 QP RN 3 83 predQP RN 4 84 predDir RN 5 85 ACPredFlag RN 6 86 videoComp RN 7 87 88 ;// Local Variables 89 90 shortVideoHeader RN 4 91 dcScaler RN 4 92 index RN 6 93 predCoeffTable RN 7 94 temp1 RN 6 95 temp2 RN 9 96 temp RN 14 97 Const RN 8 98 temppPredColBuf RN 8 99 tempPred RN 9 100 101 absCoeffDC RN 8 102 negdcScaler RN 10 103 Rem RN 11 104 temp3 RN 12 105 106 dcRowbufCoeff RN 10 107 dcColBuffCoeff RN 11 108 Return RN 0 109 110 ;//NEON Registers 111 112 qPredRowBuf QN Q0.S16 113 dPredRowBuf0 DN D0.S16 114 dPredRowBuf1 DN D1.S16 115 116 117 118 119 qCoeffTab QN Q1.S32 120 121 qPredQP QN Q2.S16 122 dPredQP0 DN D4.S16 123 dPredQP1 DN D5.S16 124 125 126 qtemp1 QN Q3.S32 127 qtemp QN Q3.S16 128 129 dtemp0 DN D6.S16 130 dtemp1 DN D7.S16 131 132 dtemp2 DN D8.S16 133 dtemp3 DN D9.S16 134 135 dtemp4 DN D2.S16 136 dtemp5 DN D3.S16 137 dtemp6 DN D4.S16 138 dtemp7 DN D5.S16 139 140 qtempPred1 QN Q5.S32 141 qtempPred QN Q5.S16 142 143 dtempPred0 DN D10.S16 144 dtempPred1 DN D11.S16 145 146 147 148 M_START omxVCM4P2_PredictReconCoefIntra,r11,d11 149 150 ;// Assigning pointers to Input arguments on Stack 151 152 M_ARG predQPonStack,4 153 M_ARG predDironStack,4 154 M_ARG ACPredFlagonStack,4 155 M_ARG videoComponStack,4 156 157 ;// DC Prediction 158 159 M_LDR videoComp,videoComponStack ;// Load videoComp From Stack 160 161 M_LDR predDir,predDironStack ;// Load Prediction direction 162 ;// DC Scaler calculation 163 LDR index, =armVCM4P2_DCScaler 164 ADD index,index,videoComp,LSL #5 165 LDRB dcScaler,[index,QP] 166 167 168 LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63) 169 CMP predDir,#2 ;// Check if the Prediction direction is vertical 170 171 ;// Caulucate tempPred 172 173 LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer 174 LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer 175 176 RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler 177 MOV temp1,absCoeffDC ;// Load the Prediction coeff to temp for comparision 178 CMP temp1,#0 179 RSBLT absCoeffDC,temp1,#0 ;// calculate absolute val of prediction coeff 180 181 ADD temp,dcScaler,dcScaler 182 LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication 183 SMULBB tempPred,temp,absCoeffDC ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler 184 ADD temp3,dcScaler,#1 185 LSR tempPred,tempPred,#15 ;// tempped=pPredBufRow(Col)[0]/dcScaler 186 LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2) 187 MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler 188 189 LDRH dcRowbufCoeff,[pPredBufCol] 190 191 CMP Rem,temp3 ;// compare Rem with (dcScaler/2) 192 ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=(dcScaler/2) 193 CMP temp1,#0 194 RSBLT tempPred,tempPred,#0 ;// tempPred=-tempPred if 195 196 STRH dcRowbufCoeff,[pPredBufRow,#-16] 197 198 199 LDRH temp,[pSrcDst] ;// temp=pSrcDst[0] 200 ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred 201 SSAT16 temp,#12,temp ;// clip temp to [-2048,2047] 202 SMULBB dcColBuffCoeff,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler 203 M_LDR ACPredFlag,ACPredFlagonStack 204 STRH dcColBuffCoeff,[pPredBufCol] 205 206 207 ;// AC Prediction 208 209 M_LDR predQP,predQPonStack 210 211 CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not 212 BNE Exit ;// If not set Exit 213 CMP predDir,#2 ;// Check the Prediction direction 214 LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63) 215 MOV Const,#4 216 MUL curQP,curQP,Const ;// curQP=4*curQP 217 VDUP dPredQP0,predQP 218 LDR temp2,[predCoeffTable,curQP] ;// temp=0x1ffff/curQP 219 VDUP qCoeffTab,temp2 220 BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal 221 222 223 224 ;// Vertical 225 ;//Calculating tempPred 226 227 VLD1 {dPredRowBuf0,dPredRowBuf1},[pPredBufRow] ;// Loading pPredBufRow[i]:i=0 t0 7 228 229 VMULL qtemp1,dPredRowBuf0,dPredQP0 ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3 230 VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3 231 232 VMULL qtemp1,dPredRowBuf1,dPredQP0 ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7 233 234 VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3 235 VSHRN dPredQP1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits 236 237 238 VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7 239 VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=4 t0 7 240 VLD1 {dtemp0,dtemp1},[pSrcDst] ;//Loading pSrcDst[i] : i=0 to 7 241 VSHRN dtempPred1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits 242 VMOV dtempPred0,dPredQP1 243 244 ;//updating source and row prediction buffer contents 245 VADD qtemp,qtemp,qtempPred ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7 246 VQSHL qtemp,qtemp,#4 ;//Clip to [-2048,2047] 247 LDRH dcRowbufCoeff,[pPredBufRow] ;//Loading Dc Value of Row Prediction buffer 248 VSHR qtemp,qtemp,#4 249 250 VST1 {dtemp0,dtemp1},[pSrcDst] ;//storing back the updated values 251 VST1 {dtemp0,dtemp1},[pPredBufRow] ;//storing back the updated row prediction values 252 STRH dcRowbufCoeff,[pPredBufRow] ;// storing the updated DC Row Prediction coeff 253 254 B Exit 255 256 Horizontal 257 258 ;// Calculating Temppred 259 260 261 262 VLD1 {dPredRowBuf0,dPredRowBuf1},[pPredBufCol] ;// Loading pPredBufCol[i]:i=0 t0 7 263 VMULL qtemp1,dPredRowBuf0,dPredQP0 ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3 264 VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3 265 266 VMULL qtemp1,dPredRowBuf1,dPredQP0 ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7 267 268 VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3 269 VSHRN dPredQP1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits 270 271 272 VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7 273 274 MOV temppPredColBuf,pPredBufCol 275 VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=4 t0 7 276 VLD4 {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst] ;// Loading coefficients Interleaving by 4 277 VSHRN dtempPred1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits 278 VMOV dtempPred0,dPredQP1 279 280 ;// Updating source and column prediction buffer contents 281 ADD temp2,pSrcDst,#32 282 VLD4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] ;// Loading next 16 coefficients Interleaving by 4 283 VUZP dtemp0,dtemp4 ;// Interleaving by 8 284 VADD dtemp0,dtemp0,dtempPred0 ;// Adding tempPred to coeffs 285 VQSHL dtemp0,dtemp0,#4 ;// Clip to [-2048,2047] 286 VSHR dtemp0,dtemp0,#4 287 VST1 {dtemp0},[pPredBufCol]! ;// Updating Pridiction column buffer 288 VZIP dtemp0,dtemp4 ;// deinterleaving 289 VST4 {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst] ;// Updating source coeffs 290 VST4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]! 291 292 MOV temp1,temp2 293 VLD4 {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]! ;// Loading coefficients Interleaving by 4 294 295 VLD4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] 296 VUZP dtemp0,dtemp4 ;// Interleaving by 8 297 VADD dtemp0,dtemp0,dtempPred1 298 VQSHL dtemp0,dtemp0,#4 ;// Clip to [-2048,2047] 299 VSHR dtemp0,dtemp0,#4 300 VST1 {dtemp0},[pPredBufCol]! 301 VZIP dtemp0,dtemp4 302 VST4 {dtemp0,dtemp1,dtemp2,dtemp3},[temp1] 303 STRH dcColBuffCoeff,[temppPredColBuf] 304 VST4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] 305 306 Exit 307 308 STRH temp,[pSrcDst] 309 310 311 MOV Return,#OMX_Sts_NoErr 312 313 M_END 314 ENDIF 315 316 317 END 318 319 320 321