Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   12290
      6 ;// Date:       Wednesday, April 9, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13         INCLUDE omxtypes_s.h
     14         INCLUDE armCOMM_s.h
     15 
     16         M_VARIANTS CortexA8
     17 
     18         IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
     19         IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
     20 
     21         IF CortexA8
     22 
     23 LOOP_COUNT  EQU 0x55000000
     24 
     25 
     26 ;// Function arguments
     27 
     28 pSrcDst     RN 0
     29 srcdstStep  RN 1
     30 pAlpha      RN 2
     31 pBeta       RN 3
     32 
     33 pThresholds RN 5
     34 pBS         RN 4
     35 bS10        RN 12
     36 
     37 pAlpha_0    RN 2
     38 pBeta_0     RN 3
     39 
     40 pAlpha_1    RN 7
     41 pBeta_1     RN 8
     42 
     43 
     44 
     45 ;// Loop
     46 
     47 XY          RN 9
     48 
     49 pTmp        RN 6
     50 step        RN 10
     51 
     52 ;// Pixels
     53 dP_0        DN D4.U8
     54 dP_1        DN D5.U8
     55 dP_2        DN D6.U8
     56 dP_3        DN D7.U8
     57 dQ_0        DN D8.U8
     58 dQ_1        DN D9.U8
     59 dQ_2        DN D10.U8
     60 dQ_3        DN D11.U8
     61 
     62 
     63 ;// Filtering Decision
     64 dAlpha      DN D0.U8
     65 dBeta       DN D2.U8
     66 
     67 dFilt       DN D16.U8
     68 dAqflg      DN D12.U8
     69 dApflg      DN D17.U8
     70 
     71 dAp0q0      DN D13.U8
     72 dAp1p0      DN D12.U8
     73 dAq1q0      DN D18.U8
     74 dAp2p0      DN D19.U8
     75 dAq2q0      DN D17.U8
     76 
     77 ;// bSLT4
     78 dTC0        DN D18.U8
     79 dTC1        DN D19.U8
     80 dTC01       DN D18.U8
     81 
     82 dTCs        DN D31.S8
     83 dTC         DN D31.U8
     84 
     85 dMask_0     DN D14.U8
     86 dMask_1     DN D15.U8
     87 
     88 Mask_0      RN 11
     89 
     90 dTemp       DN D19.U8
     91 
     92 ;// Computing P0,Q0
     93 qDq0p0      QN Q10.S16
     94 qDp1q1      QN Q11.S16
     95 qDelta      QN Q10.S16  ; reuse qDq0p0
     96 dDelta      DN D20.S8
     97 
     98 
     99 ;// Computing P1,Q1
    100 dRp0q0      DN D24.U8
    101 
    102 dMaxP       DN D23.U8
    103 dMinP       DN D22.U8
    104 
    105 dMaxQ       DN D19.U8
    106 dMinQ       DN D21.U8
    107 
    108 dDeltaP     DN D26.U8
    109 dDeltaQ     DN D27.U8
    110 
    111 qP_0n       QN Q14.S16
    112 qQ_0n       QN Q12.S16
    113 
    114 dQ_0n       DN D24.U8
    115 dQ_1n       DN D25.U8
    116 dP_0n       DN D29.U8
    117 dP_1n       DN D30.U8
    118 
    119 ;// bSGE4
    120 
    121 qSp0q0      QN Q10.U16
    122 
    123 qSp2q1      QN Q11.U16
    124 qSp0q0p1    QN Q12.U16
    125 qSp3p2      QN Q13.U16
    126 dHSp0q1     DN D28.U8
    127 
    128 qSq2p1      QN Q11.U16
    129 qSp0q0q1    QN Q12.U16
    130 qSq3q2      QN Q13.U16  ;!!
    131 dHSq0p1     DN D28.U8   ;!!
    132 
    133 qTemp1      QN Q11.U16  ;!!;qSp2q1
    134 qTemp2      QN Q12.U16  ;!!;qSp0q0p1
    135 
    136 dP_0t       DN D28.U8   ;!!;dHSp0q1
    137 dQ_0t       DN D22.U8   ;!!;Temp1
    138 
    139 dP_0n       DN D29.U8
    140 dP_1n       DN D30.U8
    141 dP_2n       DN D31.U8
    142 
    143 dQ_0n       DN D24.U8   ;!!;Temp2
    144 dQ_1n       DN D25.U8   ;!!;Temp2
    145 dQ_2n       DN D28.U8   ;!!;dQ_0t
    146 
    147 
    148         ;// Function header
    149         M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11, d15
    150 
    151         ;//Arguments on the stack
    152         M_ARG   ppThresholds, 4
    153         M_ARG   ppBS, 4
    154 
    155         ;// d0-dAlpha_0
    156         ;// d2-dBeta_0
    157 
    158         ADD         pAlpha_1, pAlpha_0, #1
    159         ADD         pBeta_1, pBeta_0, #1
    160 
    161         VLD1        {dAlpha[]}, [pAlpha_0]
    162         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
    163         VLD1        {dBeta[]}, [pBeta_0]
    164 
    165         M_LDR       pBS, ppBS
    166         M_LDR       pThresholds, ppThresholds
    167 
    168         MOV         Mask_0,#0
    169 
    170         ;dMask_0-14
    171         ;dMask_1-15
    172 
    173         VMOV        dMask_0, #0
    174         VMOV        dMask_1, #1
    175 
    176         ADD         step, srcdstStep, srcdstStep
    177 
    178         LDR         XY,=LOOP_COUNT
    179 
    180         ;// p0-p3 - d4-d7
    181         ;// q0-q3 - d8-d11
    182 LoopY
    183 LoopX
    184         LDRH        bS10, [pBS], #2
    185         ADD         pTmp, pSrcDst, srcdstStep
    186         CMP         bS10, #0
    187         BEQ         NoFilterBS0
    188 
    189         VLD1        dP_3, [pSrcDst], step
    190         VLD1        dP_2, [pTmp], step
    191         VLD1        dP_1, [pSrcDst], step
    192         VLD1        dP_0, [pTmp], step
    193         VLD1        dQ_0, [pSrcDst], step
    194         VABD        dAp1p0, dP_0, dP_1
    195         VLD1        dQ_1, [pTmp]
    196         VABD        dAp0q0, dQ_0, dP_0
    197         VLD1        dQ_2, [pSrcDst], srcdstStep
    198 
    199         VABD        dAq1q0, dQ_1, dQ_0
    200         VABD        dAp2p0, dP_2, dP_0
    201         VCGT        dFilt, dAlpha, dAp0q0
    202 
    203         TST         bS10, #0xff
    204         VMAX        dAp1p0, dAq1q0, dAp1p0
    205         VABD        dAq2q0, dQ_2, dQ_0
    206 
    207         VMOVEQ.U32  dFilt[0], Mask_0
    208         TST         bS10, #0xff00
    209 
    210         VCGT        dAp2p0, dBeta, dAp2p0
    211         VCGT        dAp1p0, dBeta, dAp1p0
    212 
    213         VMOVEQ.U32  dFilt[1], Mask_0
    214 
    215         VCGT        dAq2q0, dBeta, dAq2q0
    216         VLD1        dQ_3, [pSrcDst]
    217         VAND        dFilt, dFilt, dAp1p0
    218         TST         bS10, #4
    219 
    220         VAND        dAqflg, dFilt, dAq2q0
    221         VAND        dApflg, dFilt, dAp2p0
    222 
    223         BNE         bSGE4
    224 bSLT4
    225         ;// bS < 4 Filtering
    226         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
    227         SUB         pSrcDst, pSrcDst, srcdstStep
    228 
    229         BL          armVCM4P10_DeblockingLumabSLT4_unsafe
    230 
    231         ;// Result Storage
    232         VST1        dP_1n, [pSrcDst], srcdstStep
    233         VST1        dP_0n, [pSrcDst], srcdstStep
    234         SUB         pTmp, pSrcDst, srcdstStep, LSL #2
    235         VST1        dQ_0n, [pSrcDst], srcdstStep
    236         ADDS        XY, XY, XY
    237         VST1        dQ_1n, [pSrcDst]
    238         ADD         pSrcDst, pTmp, #8
    239 
    240         BCC         LoopX
    241         B           ExitLoopY
    242 
    243 NoFilterBS0
    244         ADD         pSrcDst, pSrcDst, #8
    245         ADDS        XY, XY, XY
    246         ADD         pThresholds, pThresholds, #2
    247         BCC         LoopX
    248         B           ExitLoopY
    249 bSGE4
    250         ;// bS >= 4 Filtering
    251         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
    252         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1
    253         BL          armVCM4P10_DeblockingLumabSGE4_unsafe
    254 
    255         ;// Result Storage
    256         VST1        dP_2n, [pSrcDst], srcdstStep
    257         VST1        dP_1n, [pSrcDst], srcdstStep
    258         VST1        dP_0n, [pSrcDst], srcdstStep
    259         SUB         pTmp, pSrcDst, srcdstStep, LSL #2
    260         VST1        dQ_0n, [pSrcDst], srcdstStep
    261         ADDS        XY,XY,XY
    262         VST1        dQ_1n, [pSrcDst], srcdstStep
    263         ADD         pThresholds, pThresholds, #2
    264         VST1        dQ_2n, [pSrcDst]
    265 
    266         ADD         pSrcDst, pTmp, #8
    267         BCC         LoopX
    268 
    269 ExitLoopY
    270 
    271         SUB         pSrcDst, pSrcDst, #16
    272         VLD1        {dAlpha[]}, [pAlpha_1]
    273         ADD         pSrcDst, pSrcDst, srcdstStep, LSL #2
    274         VLD1        {dBeta[]}, [pBeta_1]
    275         BNE         LoopY
    276 
    277         MOV         r0, #OMX_Sts_NoErr
    278 
    279         M_END
    280 
    281     ENDIF
    282 
    283 
    284 
    285 
    286         END
    287 
    288 
    289