Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  omxVCM4P10_TransformDequantChromaDCFromPair_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   12290
      6 ;// Date:       Wednesday, April 9, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13 
     14         INCLUDE omxtypes_s.h
     15         INCLUDE armCOMM_s.h
     16 
     17         IMPORT armVCM4P10_QPDivTable
     18         IMPORT armVCM4P10_VMatrixQPModTable
     19 
     20         M_VARIANTS CortexA8
     21 
     22 
     23 
     24 
     25     IF CortexA8
     26 
     27 ;// ARM Registers
     28 ;//--------------------------------------
     29 ;// Declare input registers
     30 ;//--------------------------------------
     31 ppSrc       RN 0
     32 pDst        RN 1
     33 QP          RN 2
     34 
     35 ;//--------------------------------
     36 ;// Scratch variable for Unpack2x2
     37 ;//--------------------------------
     38 pSrc        RN 9
     39 Value       RN 4
     40 Value2      RN 5
     41 Flag        RN 6
     42 strOffset   RN 7
     43 cstOffset   RN 8
     44 
     45 ;//--------------------------------
     46 ;// Scratch variable
     47 ;//--------------------------------
     48 r0w0        RN  3
     49 r0w1        RN  4
     50 
     51 c0w0        RN  5
     52 c1w0        RN  6
     53 
     54 return      RN  0
     55 pQPDivTable RN  5
     56 pQPModTable    RN  6
     57 Shift        RN  9
     58 Scale        RN  2
     59 
     60 
     61 
     62 ;// Neon Registers
     63 
     64 dZero       DN  D0.U16
     65 dInvTrCoeff DN  D0.S16
     66 dScale      DN  D1.S16
     67 qDqntCoeff  QN  Q1.S32
     68 dDqntCoeff  DN  D2.S16
     69 
     70 
     71         ;// Write function header
     72         M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9
     73 
     74         LDR     pSrc, [ppSrc]                        ;// Load pSrc
     75         VMOV    dZero, #0
     76         MOV     cstOffset, #31                       ;// To be used in the loop, to compute offset
     77 
     78         ;//-----------------------------------------------------------------------
     79         ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero
     80         ;//-----------------------------------------------------------------------
     81 
     82         VST1    dZero,[pDst]                         ;// pDst[0]  = pDst[1]  = pDst[2]  = pDst[3]  = 0
     83         LDRB     Flag,  [pSrc], #1                   ;// Preload <Flag> before <unpackLoop>
     84 
     85 
     86 unpackLoop
     87         TST      Flag,  #0x10                        ;// Computing (Flag & 0x10)
     88         LDRSBNE  Value2,[pSrc,#1]
     89         LDRBNE   Value, [pSrc], #2                   ;// Load byte wise to avoid unaligned access
     90         AND      strOffset, cstOffset, Flag, LSL #1  ;// strOffset = (Flag & 15) < 1;
     91         LDRSBEQ  Value, [pSrc], #1                   ;// Value = (OMX_U8)  *pSrc++
     92         ORRNE    Value,Value,Value2, LSL #8          ;// Value = (OMX_U16) *pSrc++
     93 
     94         TST      Flag,  #0x20                        ;// Computing (Flag & 0x20) to check, if we're done
     95         LDRBEQ   Flag,  [pSrc], #1                   ;// Flag  = (OMX_U8) *pSrc++, for next iteration
     96         STRH     Value, [pDst, strOffset]            ;// Store <Value> at offset <strOffset>
     97         BEQ      unpackLoop                          ;// Branch to the loop beginning
     98 
     99         ;//--------------------------------------------------
    100         ;//InvTransformDC2x2: Inlined (Implemented in ARM V6)
    101         ;//--------------------------------------------------
    102 
    103         LDMIA    pDst, {r0w0, r0w1}                  ;// r0w0 = |c1|c0| & r0w1 = |c3|c2|
    104 
    105         STR      pSrc, [ppSrc]                       ;// Update the bitstream pointer
    106 
    107         LDR      pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
    108         LDR      pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
    109 
    110         SADDSUBX r0w0, r0w0,  r0w0                   ;// [ c00+c01, c00-c01 ]
    111         SADDSUBX r0w1, r0w1,  r0w1                   ;// [ c10+c11, c10-c11 ]
    112 
    113         LDRSB    Shift, [pQPDivTable, QP]            ;// Shift = pQPDivTable[QP]
    114         LDRSB    Scale, [pQPModTable, QP]            ;// Scale = pQPModTable[QP]
    115 
    116         SADD16   c0w0, r0w0, r0w1                    ;// [ d00+d10, d01+d11 ]
    117         SSUB16   c1w0, r0w0, r0w1                    ;// [ d00-d10, d01-d11 ]
    118 
    119         ;//-------------------------------------------------
    120         ;//DequantChromaDC2x2: Inlined (Neon Implementation)
    121         ;//-------------------------------------------------
    122 
    123         LSL      Scale, Scale, Shift                 ;// Scale = Scale << Shift
    124         VMOV     dInvTrCoeff, c0w0, c1w0
    125         VREV32   dInvTrCoeff,dInvTrCoeff
    126         VDUP     dScale,Scale
    127 
    128         VMULL    qDqntCoeff,dInvTrCoeff,dScale
    129         VSHRN    dDqntCoeff,qDqntCoeff,#1
    130 
    131 
    132         VST1     dDqntCoeff,[pDst]                   ;// Storing all the coefficients at once
    133 
    134         MOV      return, #OMX_Sts_NoErr
    135         M_END
    136 
    137     ENDIF ;// CortexA8
    138 
    139 
    140     END
    141