Home | History | Annotate | Download | only in src
      1 ; **********
      2 ; *
      3 ; * File Name:  omxVCM4P2_PredictReconCoefIntra_s.s
      4 ; * OpenMAX DL: v1.0.2
      5 ; * Revision:   12290
      6 ; * Date:       Wednesday, April 9, 2008
      7 ; *
      8 ; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ; *
     10 ; *
     11 ; *
     12 ; * Description:
     13 ; * Contains module for DC/AC coefficient prediction
     14 ; *
     15 ; *
     16 ; * Function: omxVCM4P2_PredictReconCoefIntra
     17 ; *
     18 ; * Description:
     19 ; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
     20 ; * to the function call, prediction direction (predDir) should be selected
     21 ; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
     22 ; *
     23 ; * Remarks:
     24 ; *
     25 ; * Parameters:
     26 ; * [in]  pSrcDst      pointer to the coefficient buffer which contains the
     27 ; *                    quantized coefficient residuals (PQF) of the current
     28 ; *                    block; must be aligned on a 4-byte boundary. The
     29 ; *                    output coefficients are saturated to the range
     30 ; *                    [-2048, 2047].
     31 ; * [in]  pPredBufRow  pointer to the coefficient row buffer; must be aligned
     32 ; *                    on a 4-byte boundary.
     33 ; * [in]  pPredBufCol  pointer to the coefficient column buffer; must be
     34 ; *                    aligned on a 4-byte boundary.
     35 ; * [in]  curQP        quantization parameter of the current block. curQP may
     36 ; *                    equal to predQP especially when the current block and
     37 ; *                    the predictor block are in the same macroblock.
     38 ; * [in]  predQP       quantization parameter of the predictor block
     39 ; * [in]  predDir      indicates the prediction direction which takes one
     40 ; *                    of the following values:
     41 ; *                    OMX_VIDEO_HORIZONTAL    predict horizontally
     42 ; *                    OMX_VIDEO_VERTICAL        predict vertically
     43 ; * [in]  ACPredFlag   a flag indicating if AC prediction should be
     44 ; *                    performed. It is equal to ac_pred_flag in the bit
     45 ; *                    stream syntax of MPEG-4
     46 ; * [in]  videoComp    video component type (luminance, chrominance or
     47 ; *                    alpha) of the current block
     48 ; * [out] pSrcDst      pointer to the coefficient buffer which contains
     49 ; *                    the quantized coefficients (QF) of the current
     50 ; *                    block
     51 ; * [out] pPredBufRow  pointer to the updated coefficient row buffer
     52 ; * [out] pPredBufCol  pointer to the updated coefficient column buffer
     53 ; * Return Value:
     54 ; * OMX_Sts_NoErr - no error
     55 ; * OMX_Sts_BadArgErr - Bad arguments
     56 ; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
     57 ; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31,
     58 ; *   predQP > 31, preDir exceeds [1,2].
     59 ; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not
     60 ; *   4-byte aligned.
     61 ; *
     62 ; *********
     63 
     64         INCLUDE omxtypes_s.h
     65         INCLUDE armCOMM_s.h
     66 
     67        M_VARIANTS CortexA8
     68 
     69 
     70 
     71        IMPORT        armVCM4P2_Reciprocal_QP_S32
     72        IMPORT        armVCM4P2_Reciprocal_QP_S16
     73        IMPORT        armVCM4P2_DCScaler
     74 
     75         IF CortexA8
     76 ;// Input Arguments
     77 
     78 pSrcDst          RN 0
     79 pPredBufRow      RN 1
     80 pPredBufCol      RN 2
     81 curQP            RN 3
     82 QP               RN 3
     83 predQP           RN 4
     84 predDir          RN 5
     85 ACPredFlag       RN 6
     86 videoComp        RN 7
     87 
     88 ;// Local Variables
     89 
     90 shortVideoHeader RN 4
     91 dcScaler         RN 4
     92 index            RN 6
     93 predCoeffTable   RN 7
     94 temp1            RN 6
     95 temp2            RN 9
     96 temp             RN 14
     97 Const            RN 8
     98 temppPredColBuf  RN 8
     99 tempPred         RN 9
    100 
    101 absCoeffDC       RN 8
    102 negdcScaler      RN 10
    103 Rem              RN 11
    104 temp3            RN 12
    105 
    106 dcRowbufCoeff    RN 10
    107 dcColBuffCoeff   RN 11
    108 Return           RN 0
    109 
    110 ;//NEON Registers
    111 
    112 qPredRowBuf       QN Q0.S16
    113 dPredRowBuf0      DN D0.S16
    114 dPredRowBuf1      DN D1.S16
    115 
    116 
    117 
    118 
    119 qCoeffTab         QN Q1.S32
    120 
    121 qPredQP           QN Q2.S16
    122 dPredQP0          DN D4.S16
    123 dPredQP1          DN D5.S16
    124 
    125 
    126 qtemp1            QN Q3.S32
    127 qtemp             QN Q3.S16
    128 
    129 dtemp0            DN D6.S16
    130 dtemp1            DN D7.S16
    131 
    132 dtemp2            DN D8.S16
    133 dtemp3            DN D9.S16
    134 
    135 dtemp4            DN D2.S16
    136 dtemp5            DN D3.S16
    137 dtemp6            DN D4.S16
    138 dtemp7            DN D5.S16
    139 
    140 qtempPred1        QN Q5.S32
    141 qtempPred         QN Q5.S16
    142 
    143 dtempPred0        DN D10.S16
    144 dtempPred1        DN D11.S16
    145 
    146 
    147 
    148       M_START   omxVCM4P2_PredictReconCoefIntra,r11,d11
    149 
    150       ;// Assigning pointers to Input arguments on Stack
    151 
    152       M_ARG           predQPonStack,4
    153       M_ARG           predDironStack,4
    154       M_ARG           ACPredFlagonStack,4
    155       M_ARG           videoComponStack,4
    156 
    157       ;// DC Prediction
    158 
    159       M_LDR           videoComp,videoComponStack                     ;// Load videoComp From Stack
    160 
    161       M_LDR           predDir,predDironStack                         ;// Load Prediction direction
    162       ;// DC Scaler calculation
    163       LDR             index, =armVCM4P2_DCScaler
    164       ADD             index,index,videoComp,LSL #5
    165       LDRB            dcScaler,[index,QP]
    166 
    167 
    168       LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S16   ;// Loading the table with entries 32767/(1 to 63)
    169       CMP             predDir,#2                                     ;// Check if the Prediction direction is vertical
    170 
    171       ;// Caulucate tempPred
    172 
    173       LDREQSH         absCoeffDC,[pPredBufRow]                       ;// If vetical load the coeff from Row Prediction Buffer
    174       LDRNESH         absCoeffDC,[pPredBufCol]                       ;// If horizontal load the coeff from column Prediction Buffer
    175 
    176       RSB             negdcScaler,dcScaler,#0                        ;// negdcScaler=-dcScaler
    177       MOV             temp1,absCoeffDC                               ;// Load the Prediction coeff to temp for comparision
    178       CMP             temp1,#0
    179       RSBLT           absCoeffDC,temp1,#0                            ;// calculate absolute val of prediction coeff
    180 
    181       ADD             temp,dcScaler,dcScaler
    182       LDRH            temp,[predCoeffTable,temp]                     ;// Load value from coeff table for performing division using multiplication
    183       SMULBB          tempPred,temp,absCoeffDC                       ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler
    184       ADD             temp3,dcScaler,#1
    185       LSR             tempPred,tempPred,#15                          ;// tempped=pPredBufRow(Col)[0]/dcScaler
    186       LSR             temp3,temp3,#1                                 ;// temp3=round(dcScaler/2)
    187       MLA             Rem,negdcScaler,tempPred,absCoeffDC            ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler
    188 
    189       LDRH            dcRowbufCoeff,[pPredBufCol]
    190 
    191       CMP             Rem,temp3                                      ;// compare Rem with (dcScaler/2)
    192       ADDGE           tempPred,#1                                    ;// tempPred=tempPred+1 if Rem>=(dcScaler/2)
    193       CMP             temp1,#0
    194       RSBLT           tempPred,tempPred,#0                           ;// tempPred=-tempPred if
    195 
    196       STRH            dcRowbufCoeff,[pPredBufRow,#-16]
    197 
    198 
    199       LDRH            temp,[pSrcDst]                                 ;// temp=pSrcDst[0]
    200       ADD             temp,temp,tempPred                             ;// temp=pSrcDst[0]+tempPred
    201       SSAT16          temp,#12,temp                                  ;// clip temp to [-2048,2047]
    202       SMULBB          dcColBuffCoeff,temp,dcScaler                   ;// temp1=clipped(pSrcDst[0])*dcScaler
    203       M_LDR           ACPredFlag,ACPredFlagonStack
    204       STRH            dcColBuffCoeff,[pPredBufCol]
    205 
    206 
    207        ;// AC Prediction
    208 
    209       M_LDR           predQP,predQPonStack
    210 
    211       CMP             ACPredFlag,#1                                  ;// Check if the AC prediction flag is set or not
    212       BNE             Exit                                           ;// If not set Exit
    213       CMP             predDir,#2                                     ;// Check the Prediction direction
    214       LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S32   ;// Loading the table with entries 0x1ffff/(1 to 63)
    215       MOV             Const,#4
    216       MUL             curQP,curQP,Const                              ;// curQP=4*curQP
    217       VDUP            dPredQP0,predQP
    218       LDR             temp2,[predCoeffTable,curQP]                   ;// temp=0x1ffff/curQP
    219       VDUP            qCoeffTab,temp2
    220       BNE             Horizontal                                     ;// If the Prediction direction is horizontal branch to Horizontal
    221 
    222 
    223 
    224       ;// Vertical
    225       ;//Calculating tempPred
    226 
    227       VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufRow]      ;// Loading pPredBufRow[i]:i=0 t0 7
    228 
    229       VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3
    230       VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
    231 
    232       VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7
    233 
    234       VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3
    235       VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
    236 
    237 
    238       VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
    239       VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP)  : i=4 t0 7
    240       VLD1            {dtemp0,dtemp1},[pSrcDst]                      ;//Loading pSrcDst[i] : i=0 to 7
    241       VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
    242       VMOV            dtempPred0,dPredQP1
    243 
    244       ;//updating source and row prediction buffer contents
    245       VADD            qtemp,qtemp,qtempPred                          ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7
    246       VQSHL           qtemp,qtemp,#4                                 ;//Clip to [-2048,2047]
    247       LDRH            dcRowbufCoeff,[pPredBufRow]                    ;//Loading Dc Value of Row Prediction buffer
    248       VSHR            qtemp,qtemp,#4
    249 
    250       VST1            {dtemp0,dtemp1},[pSrcDst]                      ;//storing back the updated values
    251       VST1            {dtemp0,dtemp1},[pPredBufRow]                  ;//storing back the updated row prediction values
    252       STRH            dcRowbufCoeff,[pPredBufRow]                    ;// storing the updated DC Row Prediction coeff
    253 
    254       B               Exit
    255 
    256 Horizontal
    257 
    258       ;// Calculating Temppred
    259 
    260 
    261 
    262       VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufCol]      ;// Loading pPredBufCol[i]:i=0 t0 7
    263       VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3
    264       VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
    265 
    266       VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7
    267 
    268       VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3
    269       VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
    270 
    271 
    272       VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
    273 
    274       MOV             temppPredColBuf,pPredBufCol
    275       VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP)  : i=4 t0 7
    276       VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Loading coefficients Interleaving by 4
    277       VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
    278       VMOV            dtempPred0,dPredQP1
    279 
    280       ;// Updating source and column prediction buffer contents
    281       ADD             temp2,pSrcDst,#32
    282       VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]          ;// Loading next 16 coefficients Interleaving by 4
    283       VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
    284       VADD            dtemp0,dtemp0,dtempPred0                       ;// Adding tempPred to coeffs
    285       VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
    286       VSHR            dtemp0,dtemp0,#4
    287       VST1            {dtemp0},[pPredBufCol]!                        ;// Updating Pridiction column buffer
    288       VZIP            dtemp0,dtemp4                                  ;// deinterleaving
    289       VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Updating source coeffs
    290       VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]!
    291 
    292       MOV             temp1,temp2
    293       VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]!         ;// Loading  coefficients Interleaving by 4
    294 
    295       VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
    296       VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
    297       VADD            dtemp0,dtemp0,dtempPred1
    298       VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
    299       VSHR            dtemp0,dtemp0,#4
    300       VST1            {dtemp0},[pPredBufCol]!
    301       VZIP            dtemp0,dtemp4
    302       VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp1]
    303       STRH            dcColBuffCoeff,[temppPredColBuf]
    304       VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
    305 
    306 Exit
    307 
    308       STRH            temp,[pSrcDst]
    309 
    310 
    311       MOV             Return,#OMX_Sts_NoErr
    312 
    313       M_END
    314       ENDIF
    315 
    316 
    317        END
    318 
    319 
    320 
    321