Home | History | Annotate | Download | only in src
      1 ;/**
      2 ; *
      3 ; * File Name:  omxVCM4P2_QuantInvInter_I_s.s
      4 ; * OpenMAX DL: v1.0.2
      5 ; * Revision:   12290
      6 ; * Date:       Wednesday, April 9, 2008
      7 ; *
      8 ; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ; *
     10 ; *
     11 ; *
     12 ; * Description:
     13 ; * Contains modules for inter reconstruction
     14 ; *
     15 ; *
     16 ; *
     17 ; *
     18 ; *
     19 ; * Function: omxVCM4P2_QuantInvInter_I
     20 ; *
     21 ; * Description:
     22 ; * Performs inverse quantization on intra/inter coded block.
     23 ; * This function supports bits_per_pixel = 8. Mismatch control
     24 ; * is performed for the first MPEG-4 mode inverse quantization method.
     25 ; * The output coefficients are clipped to the range: [-2048, 2047].
     26 ; * Mismatch control is performed for the first inverse quantization method.
     27 ; *
     28 ; * Remarks:
     29 ; *
     30 ; * Parameters:
     31 ; * [in] pSrcDst          pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
     32 ; * [in] QP              quantization parameter (quantiser_scale)
     33 ; * [in] videoComp      (Intra version only.) Video component type of the
     34 ; *                  current block. Takes one of the following flags:
     35 ; *                  OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
     36 ; *                  OMX_VC_ALPHA.
     37 ; * [in] shortVideoHeader a flag indicating presence of short_video_header;
     38 ; *                       shortVideoHeader==1 selects linear intra DC mode,
     39 ; *                  and shortVideoHeader==0 selects nonlinear intra DC mode.
     40 ; * [out]    pSrcDst      pointer to the output (dequantized) intra/inter block.  Must be 16-byte aligned.
     41 ; *
     42 ; * Return Value:
     43 ; * OMX_Sts_NoErr - no error
     44 ; * OMX_Sts_BadArgErr - bad arguments
     45 ; *    - If pSrcDst is NULL or is not 16-byte aligned.
     46 ; *      or
     47 ; *    - If QP <= 0.
     48 ; *      or
     49 ; *    - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
     50 ; *
     51 ; */
     52 
     53    INCLUDE omxtypes_s.h
     54    INCLUDE armCOMM_s.h
     55 
     56    M_VARIANTS CortexA8
     57 
     58      IF CortexA8
     59 
     60 
     61 ;//Input Arguments
     62 pSrcDst            RN 0
     63 QP                 RN 1
     64 
     65 
     66 ;//Local Variables
     67 Count              RN 3
     68 doubleQP           RN 4
     69 Return             RN 0
     70 ;// Neon registers
     71 
     72 
     73 dQP10              DN D0.S32[0]
     74 qQP1               QN Q0.S32
     75 
     76 dQP1               DN D0.S16
     77 dMinusQP1          DN D1.S16
     78 
     79 dCoeff0            DN D2.S16
     80 dCoeff1            DN D3.S16
     81 
     82 qResult0           QN Q3.S32
     83 dResult0           DN D7.S16
     84 qSign0             QN Q3.S32
     85 dSign0             DN D6.S16
     86 
     87 qResult1           QN Q4.S32
     88 dResult1           DN D8.S16
     89 qSign1             QN Q4.S32
     90 dSign1             DN D8.S16
     91 
     92 d2QP0              DN D10.S32[0]
     93 q2QP0              QN Q5.S32
     94 d2QP               DN D10.S16
     95 
     96 dZero0             DN D11.S16
     97 dZero1             DN D12.S16
     98 dConst0            DN D13.S16
     99 
    100 
    101      M_START omxVCM4P2_QuantInvInter_I,r4,d13
    102 
    103 
    104 
    105          ADD      doubleQP,QP,QP                   ;// doubleQP= 2*QP
    106          VMOV     d2QP0,doubleQP
    107          VDUP     q2QP0,d2QP0                      ;// Move doubleQP in to a scalar
    108          TST      QP,#1
    109          VLD1     {dCoeff0,dCoeff1},[pSrcDst]      ;// Load first 8 values to Coeff0,Coeff1
    110          SUBEQ    QP,QP,#1
    111          VMOV     dQP10,QP                         ;// If QP is even then QP1=QP-1 else QP1=QP
    112          MOV      Count,#64
    113          VDUP     qQP1,dQP10                       ;// Duplicate tempResult with QP1
    114          VSHRN    d2QP,q2QP0,#0
    115          VEOR     dConst0,dConst0,dConst0
    116          VSHRN    dQP1,qQP1,#0                     ;// QP1 truncated to 16 bits
    117          VSUB     dMinusQP1,dConst0,dQP1           ;// dMinusQP1=-QP1
    118 
    119 Loop
    120 
    121         ;//Performing Inverse Quantization
    122 
    123          VCLT     dSign0,dCoeff0, #0               ;// Compare Coefficient 0 against 0
    124          VCLT     dSign1,dCoeff1, #0               ;// Compare Coefficient 1 against 0
    125          VCEQ     dZero0,dCoeff0,#0                ;// Compare Coefficient 0 against zero
    126          VBSL     dSign0,dMinusQP1,dQP1            ;// dSign0 = -QP1 if Coeff0< 0 else QP1
    127          VCEQ     dZero1,dCoeff1,#0                ;// Compare Coefficient 1 against zero
    128          VBSL     dSign1,dMinusQP1,dQP1            ;// dSign1 = -QP1 if Coeff1< 0 else QP1
    129          VMOVL    qSign0,dSign0                    ;// Sign extend qSign0 to 32 bits
    130          VMOVL    qSign1,dSign1
    131          VMLAL    qResult0,dCoeff0,d2QP            ;// qResult0[i]= qCoeff0[i]+qCoeff0[i]*(-2) if Coeff <0
    132                                                    ;// qResult0[i]= qCoeff0[i]                 if Coeff >=0
    133          VMLAL    qResult1,dCoeff1,d2QP            ;// qResult1[i]= qCoeff1[i]+qCoeff1[i]*(-2) if Coeff <0
    134                                                    ;// qResult1[i]= qCoeff1[i]                 if Coeff >=0
    135          ;// Clip Result to [-2048,2047]
    136 
    137          VQSHL    qResult0,qResult0,#20            ;// clip to [-2048,2047]
    138          VQSHL    qResult1,qResult1,#20
    139 
    140          VSHR     qResult0,qResult0,#4
    141          VSHR     qResult1,qResult1,#4
    142          VSHRN    dResult0,qResult0,#16            ;// Narrow the clipped Value to Halfword
    143          VSHRN    dResult1,qResult1,#16
    144          VBIT     dResult0,dConst0,dZero0
    145          VBIT     dResult1,dConst0,dZero1
    146 
    147          VST1     {dResult0,dResult1},[pSrcDst]!   ;// Store the result
    148          SUBS     Count,Count,#8
    149          VLD1     {dCoeff0,dCoeff1},[pSrcDst]
    150 
    151 
    152          BGT      Loop
    153 
    154          MOV      Return,#OMX_Sts_NoErr
    155 
    156 
    157          M_END
    158          ENDIF
    159 
    160 
    161         END
    162 
    163