Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ; **********
     17 ; *
     18 ; * File Name:  omxVCM4P2_PredictReconCoefIntra_s.s
     19 ; * OpenMAX DL: v1.0.2
     20 ; * Revision:   12290
     21 ; * Date:       Wednesday, April 9, 2008
     22 ; *
     23 ; *
     24 ; *
     25 ; *
     26 ; * Description:
     27 ; * Contains module for DC/AC coefficient prediction
     28 ; *
     29 ; *
     30 ; * Function: omxVCM4P2_PredictReconCoefIntra
     31 ; *
     32 ; * Description:
     33 ; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
     34 ; * to the function call, prediction direction (predDir) should be selected
     35 ; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
     36 ; *
     37 ; * Remarks:
     38 ; *
     39 ; * Parameters:
     40 ; * [in]  pSrcDst      pointer to the coefficient buffer which contains the
     41 ; *                    quantized coefficient residuals (PQF) of the current
     42 ; *                    block; must be aligned on a 4-byte boundary. The
     43 ; *                    output coefficients are saturated to the range
     44 ; *                    [-2048, 2047].
     45 ; * [in]  pPredBufRow  pointer to the coefficient row buffer; must be aligned
     46 ; *                    on a 4-byte boundary.
     47 ; * [in]  pPredBufCol  pointer to the coefficient column buffer; must be
     48 ; *                    aligned on a 4-byte boundary.
     49 ; * [in]  curQP        quantization parameter of the current block. curQP may
     50 ; *                    equal to predQP especially when the current block and
     51 ; *                    the predictor block are in the same macroblock.
     52 ; * [in]  predQP       quantization parameter of the predictor block
     53 ; * [in]  predDir      indicates the prediction direction which takes one
     54 ; *                    of the following values:
     55 ; *                    OMX_VIDEO_HORIZONTAL    predict horizontally
     56 ; *                    OMX_VIDEO_VERTICAL        predict vertically
     57 ; * [in]  ACPredFlag   a flag indicating if AC prediction should be
     58 ; *                    performed. It is equal to ac_pred_flag in the bit
     59 ; *                    stream syntax of MPEG-4
     60 ; * [in]  videoComp    video component type (luminance, chrominance or
     61 ; *                    alpha) of the current block
     62 ; * [out] pSrcDst      pointer to the coefficient buffer which contains
     63 ; *                    the quantized coefficients (QF) of the current
     64 ; *                    block
     65 ; * [out] pPredBufRow  pointer to the updated coefficient row buffer
     66 ; * [out] pPredBufCol  pointer to the updated coefficient column buffer
     67 ; * Return Value:
     68 ; * OMX_Sts_NoErr - no error
     69 ; * OMX_Sts_BadArgErr - Bad arguments
     70 ; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
     71 ; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31,
     72 ; *   predQP > 31, preDir exceeds [1,2].
     73 ; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not
     74 ; *   4-byte aligned.
     75 ; *
     76 ; *********
     77 
     78         INCLUDE omxtypes_s.h
     79         INCLUDE armCOMM_s.h
     80 
     81        M_VARIANTS CortexA8
     82 
     83 
     84 
     85        IMPORT        armVCM4P2_Reciprocal_QP_S32
     86        IMPORT        armVCM4P2_Reciprocal_QP_S16
     87        IMPORT        armVCM4P2_DCScaler
     88 
     89         IF CortexA8
     90 ;// Input Arguments
     91 
     92 pSrcDst          RN 0
     93 pPredBufRow      RN 1
     94 pPredBufCol      RN 2
     95 curQP            RN 3
     96 QP               RN 3
     97 predQP           RN 4
     98 predDir          RN 5
     99 ACPredFlag       RN 6
    100 videoComp        RN 7
    101 
    102 ;// Local Variables
    103 
    104 shortVideoHeader RN 4
    105 dcScaler         RN 4
    106 index            RN 6
    107 predCoeffTable   RN 7
    108 temp1            RN 6
    109 temp2            RN 9
    110 temp             RN 14
    111 Const            RN 8
    112 temppPredColBuf  RN 8
    113 tempPred         RN 9
    114 
    115 absCoeffDC       RN 8
    116 negdcScaler      RN 10
    117 Rem              RN 11
    118 temp3            RN 12
    119 
    120 dcRowbufCoeff    RN 10
    121 dcColBuffCoeff   RN 11
    122 Return           RN 0
    123 
    124 ;//NEON Registers
    125 
    126 qPredRowBuf       QN Q0.S16
    127 dPredRowBuf0      DN D0.S16
    128 dPredRowBuf1      DN D1.S16
    129 
    130 
    131 
    132 
    133 qCoeffTab         QN Q1.S32
    134 
    135 qPredQP           QN Q2.S16
    136 dPredQP0          DN D4.S16
    137 dPredQP1          DN D5.S16
    138 
    139 
    140 qtemp1            QN Q3.S32
    141 qtemp             QN Q3.S16
    142 
    143 dtemp0            DN D6.S16
    144 dtemp1            DN D7.S16
    145 
    146 dtemp2            DN D8.S16
    147 dtemp3            DN D9.S16
    148 
    149 dtemp4            DN D2.S16
    150 dtemp5            DN D3.S16
    151 dtemp6            DN D4.S16
    152 dtemp7            DN D5.S16
    153 
    154 qtempPred1        QN Q5.S32
    155 qtempPred         QN Q5.S16
    156 
    157 dtempPred0        DN D10.S16
    158 dtempPred1        DN D11.S16
    159 
    160 
    161 
    162       M_START   omxVCM4P2_PredictReconCoefIntra,r11,d11
    163 
    164       ;// Assigning pointers to Input arguments on Stack
    165 
    166       M_ARG           predQPonStack,4
    167       M_ARG           predDironStack,4
    168       M_ARG           ACPredFlagonStack,4
    169       M_ARG           videoComponStack,4
    170 
    171       ;// DC Prediction
    172 
    173       M_LDR           videoComp,videoComponStack                     ;// Load videoComp From Stack
    174 
    175       M_LDR           predDir,predDironStack                         ;// Load Prediction direction
    176       ;// DC Scaler calculation
    177       LDR             index, =armVCM4P2_DCScaler
    178       ADD             index,index,videoComp,LSL #5
    179       LDRB            dcScaler,[index,QP]
    180 
    181 
    182       LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S16   ;// Loading the table with entries 32767/(1 to 63)
    183       CMP             predDir,#2                                     ;// Check if the Prediction direction is vertical
    184 
    185       ;// Caulucate tempPred
    186 
    187       LDREQSH         absCoeffDC,[pPredBufRow]                       ;// If vetical load the coeff from Row Prediction Buffer
    188       LDRNESH         absCoeffDC,[pPredBufCol]                       ;// If horizontal load the coeff from column Prediction Buffer
    189 
    190       RSB             negdcScaler,dcScaler,#0                        ;// negdcScaler=-dcScaler
    191       MOV             temp1,absCoeffDC                               ;// Load the Prediction coeff to temp for comparision
    192       CMP             temp1,#0
    193       RSBLT           absCoeffDC,temp1,#0                            ;// calculate absolute val of prediction coeff
    194 
    195       ADD             temp,dcScaler,dcScaler
    196       LDRH            temp,[predCoeffTable,temp]                     ;// Load value from coeff table for performing division using multiplication
    197       SMULBB          tempPred,temp,absCoeffDC                       ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler
    198       ADD             temp3,dcScaler,#1
    199       LSR             tempPred,tempPred,#15                          ;// tempped=pPredBufRow(Col)[0]/dcScaler
    200       LSR             temp3,temp3,#1                                 ;// temp3=round(dcScaler/2)
    201       MLA             Rem,negdcScaler,tempPred,absCoeffDC            ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler
    202 
    203       LDRH            dcRowbufCoeff,[pPredBufCol]
    204 
    205       CMP             Rem,temp3                                      ;// compare Rem with (dcScaler/2)
    206       ADDGE           tempPred,#1                                    ;// tempPred=tempPred+1 if Rem>=(dcScaler/2)
    207       CMP             temp1,#0
    208       RSBLT           tempPred,tempPred,#0                           ;// tempPred=-tempPred if
    209 
    210       STRH            dcRowbufCoeff,[pPredBufRow,#-16]
    211 
    212 
    213       LDRH            temp,[pSrcDst]                                 ;// temp=pSrcDst[0]
    214       ADD             temp,temp,tempPred                             ;// temp=pSrcDst[0]+tempPred
    215       SSAT16          temp,#12,temp                                  ;// clip temp to [-2048,2047]
    216       SMULBB          dcColBuffCoeff,temp,dcScaler                   ;// temp1=clipped(pSrcDst[0])*dcScaler
    217       M_LDR           ACPredFlag,ACPredFlagonStack
    218       STRH            dcColBuffCoeff,[pPredBufCol]
    219 
    220 
    221        ;// AC Prediction
    222 
    223       M_LDR           predQP,predQPonStack
    224 
    225       CMP             ACPredFlag,#1                                  ;// Check if the AC prediction flag is set or not
    226       BNE             Exit                                           ;// If not set Exit
    227       CMP             predDir,#2                                     ;// Check the Prediction direction
    228       LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S32   ;// Loading the table with entries 0x1ffff/(1 to 63)
    229       MOV             Const,#4
    230       MUL             curQP,curQP,Const                              ;// curQP=4*curQP
    231       VDUP            dPredQP0,predQP
    232       LDR             temp2,[predCoeffTable,curQP]                   ;// temp=0x1ffff/curQP
    233       VDUP            qCoeffTab,temp2
    234       BNE             Horizontal                                     ;// If the Prediction direction is horizontal branch to Horizontal
    235 
    236 
    237 
    238       ;// Vertical
    239       ;//Calculating tempPred
    240 
    241       VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufRow]      ;// Loading pPredBufRow[i]:i=0 t0 7
    242 
    243       VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3
    244       VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
    245 
    246       VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7
    247 
    248       VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3
    249       VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
    250 
    251 
    252       VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
    253       VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP)  : i=4 t0 7
    254       VLD1            {dtemp0,dtemp1},[pSrcDst]                      ;//Loading pSrcDst[i] : i=0 to 7
    255       VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
    256       VMOV            dtempPred0,dPredQP1
    257 
    258       ;//updating source and row prediction buffer contents
    259       VADD            qtemp,qtemp,qtempPred                          ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7
    260       VQSHL           qtemp,qtemp,#4                                 ;//Clip to [-2048,2047]
    261       LDRH            dcRowbufCoeff,[pPredBufRow]                    ;//Loading Dc Value of Row Prediction buffer
    262       VSHR            qtemp,qtemp,#4
    263 
    264       VST1            {dtemp0,dtemp1},[pSrcDst]                      ;//storing back the updated values
    265       VST1            {dtemp0,dtemp1},[pPredBufRow]                  ;//storing back the updated row prediction values
    266       STRH            dcRowbufCoeff,[pPredBufRow]                    ;// storing the updated DC Row Prediction coeff
    267 
    268       B               Exit
    269 
    270 Horizontal
    271 
    272       ;// Calculating Temppred
    273 
    274 
    275 
    276       VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufCol]      ;// Loading pPredBufCol[i]:i=0 t0 7
    277       VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3
    278       VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
    279 
    280       VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7
    281 
    282       VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3
    283       VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
    284 
    285 
    286       VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
    287 
    288       MOV             temppPredColBuf,pPredBufCol
    289       VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP)  : i=4 t0 7
    290       VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Loading coefficients Interleaving by 4
    291       VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
    292       VMOV            dtempPred0,dPredQP1
    293 
    294       ;// Updating source and column prediction buffer contents
    295       ADD             temp2,pSrcDst,#32
    296       VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]          ;// Loading next 16 coefficients Interleaving by 4
    297       VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
    298       VADD            dtemp0,dtemp0,dtempPred0                       ;// Adding tempPred to coeffs
    299       VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
    300       VSHR            dtemp0,dtemp0,#4
    301       VST1            {dtemp0},[pPredBufCol]!                        ;// Updating Pridiction column buffer
    302       VZIP            dtemp0,dtemp4                                  ;// deinterleaving
    303       VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Updating source coeffs
    304       VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]!
    305 
    306       MOV             temp1,temp2
    307       VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]!         ;// Loading  coefficients Interleaving by 4
    308 
    309       VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
    310       VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
    311       VADD            dtemp0,dtemp0,dtempPred1
    312       VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
    313       VSHR            dtemp0,dtemp0,#4
    314       VST1            {dtemp0},[pPredBufCol]!
    315       VZIP            dtemp0,dtemp4
    316       VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp1]
    317       STRH            dcColBuffCoeff,[temppPredColBuf]
    318       VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
    319 
    320 Exit
    321 
    322       STRH            temp,[pSrcDst]
    323 
    324 
    325       MOV             Return,#OMX_Sts_NoErr
    326 
    327       M_END
    328       ENDIF
    329 
    330 
    331        END
    332 
    333 
    334 
    335