Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  omxVCM4P10_TransformDequantChromaDCFromPair_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   12290
     21 ;// Date:       Wednesday, April 9, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27 
     28         INCLUDE omxtypes_s.h
     29         INCLUDE armCOMM_s.h
     30 
     31         IMPORT armVCM4P10_QPDivTable
     32         IMPORT armVCM4P10_VMatrixQPModTable
     33 
     34         M_VARIANTS CortexA8
     35 
     36 
     37 
     38 
     39     IF CortexA8
     40 
     41 ;// ARM Registers
     42 ;//--------------------------------------
     43 ;// Declare input registers
     44 ;//--------------------------------------
     45 ppSrc       RN 0
     46 pDst        RN 1
     47 QP          RN 2
     48 
     49 ;//--------------------------------
     50 ;// Scratch variable for Unpack2x2
     51 ;//--------------------------------
     52 pSrc        RN 9
     53 Value       RN 4
     54 Value2      RN 5
     55 Flag        RN 6
     56 strOffset   RN 7
     57 cstOffset   RN 8
     58 
     59 ;//--------------------------------
     60 ;// Scratch variable
     61 ;//--------------------------------
     62 r0w0        RN  3
     63 r0w1        RN  4
     64 
     65 c0w0        RN  5
     66 c1w0        RN  6
     67 
     68 return      RN  0
     69 pQPDivTable RN  5
     70 pQPModTable    RN  6
     71 Shift        RN  9
     72 Scale        RN  2
     73 
     74 
     75 
     76 ;// Neon Registers
     77 
     78 dZero       DN  D0.U16
     79 dInvTrCoeff DN  D0.S16
     80 dScale      DN  D1.S16
     81 qDqntCoeff  QN  Q1.S32
     82 dDqntCoeff  DN  D2.S16
     83 
     84 
     85         ;// Write function header
     86         M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9
     87 
     88         LDR     pSrc, [ppSrc]                        ;// Load pSrc
     89         VMOV    dZero, #0
     90         MOV     cstOffset, #31                       ;// To be used in the loop, to compute offset
     91 
     92         ;//-----------------------------------------------------------------------
     93         ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero
     94         ;//-----------------------------------------------------------------------
     95 
     96         VST1    dZero,[pDst]                         ;// pDst[0]  = pDst[1]  = pDst[2]  = pDst[3]  = 0
     97         LDRB     Flag,  [pSrc], #1                   ;// Preload <Flag> before <unpackLoop>
     98 
     99 
    100 unpackLoop
    101         TST      Flag,  #0x10                        ;// Computing (Flag & 0x10)
    102         LDRSBNE  Value2,[pSrc,#1]
    103         LDRBNE   Value, [pSrc], #2                   ;// Load byte wise to avoid unaligned access
    104         AND      strOffset, cstOffset, Flag, LSL #1  ;// strOffset = (Flag & 15) < 1;
    105         LDRSBEQ  Value, [pSrc], #1                   ;// Value = (OMX_U8)  *pSrc++
    106         ORRNE    Value,Value,Value2, LSL #8          ;// Value = (OMX_U16) *pSrc++
    107 
    108         TST      Flag,  #0x20                        ;// Computing (Flag & 0x20) to check, if we're done
    109         LDRBEQ   Flag,  [pSrc], #1                   ;// Flag  = (OMX_U8) *pSrc++, for next iteration
    110         STRH     Value, [pDst, strOffset]            ;// Store <Value> at offset <strOffset>
    111         BEQ      unpackLoop                          ;// Branch to the loop beginning
    112 
    113         ;//--------------------------------------------------
    114         ;//InvTransformDC2x2: Inlined (Implemented in ARM V6)
    115         ;//--------------------------------------------------
    116 
    117         LDMIA    pDst, {r0w0, r0w1}                  ;// r0w0 = |c1|c0| & r0w1 = |c3|c2|
    118 
    119         STR      pSrc, [ppSrc]                       ;// Update the bitstream pointer
    120 
    121         LDR      pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
    122         LDR      pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
    123 
    124         SADDSUBX r0w0, r0w0,  r0w0                   ;// [ c00+c01, c00-c01 ]
    125         SADDSUBX r0w1, r0w1,  r0w1                   ;// [ c10+c11, c10-c11 ]
    126 
    127         LDRSB    Shift, [pQPDivTable, QP]            ;// Shift = pQPDivTable[QP]
    128         LDRSB    Scale, [pQPModTable, QP]            ;// Scale = pQPModTable[QP]
    129 
    130         SADD16   c0w0, r0w0, r0w1                    ;// [ d00+d10, d01+d11 ]
    131         SSUB16   c1w0, r0w0, r0w1                    ;// [ d00-d10, d01-d11 ]
    132 
    133         ;//-------------------------------------------------
    134         ;//DequantChromaDC2x2: Inlined (Neon Implementation)
    135         ;//-------------------------------------------------
    136 
    137         LSL      Scale, Scale, Shift                 ;// Scale = Scale << Shift
    138         VMOV     dInvTrCoeff, c0w0, c1w0
    139         VREV32   dInvTrCoeff,dInvTrCoeff
    140         VDUP     dScale,Scale
    141 
    142         VMULL    qDqntCoeff,dInvTrCoeff,dScale
    143         VSHRN    dDqntCoeff,qDqntCoeff,#1
    144 
    145 
    146         VST1     dDqntCoeff,[pDst]                   ;// Storing all the coefficients at once
    147 
    148         MOV      return, #OMX_Sts_NoErr
    149         M_END
    150 
    151     ENDIF ;// CortexA8
    152 
    153 
    154     END
    155