Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   12290
     21 ;// Date:       Wednesday, April 9, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27         INCLUDE omxtypes_s.h
     28         INCLUDE armCOMM_s.h
     29 
     30         M_VARIANTS CortexA8
     31 
     32         IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
     33         IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
     34 
     35         IF CortexA8
     36 
     37 LOOP_COUNT  EQU 0x55000000
     38 
     39 
     40 ;// Function arguments
     41 
     42 pSrcDst     RN 0
     43 srcdstStep  RN 1
     44 pAlpha      RN 2
     45 pBeta       RN 3
     46 
     47 pThresholds RN 5
     48 pBS         RN 4
     49 bS10        RN 12
     50 
     51 pAlpha_0    RN 2
     52 pBeta_0     RN 3
     53 
     54 pAlpha_1    RN 7
     55 pBeta_1     RN 8
     56 
     57 
     58 
     59 ;// Loop
     60 
     61 XY          RN 9
     62 
     63 pTmp        RN 6
     64 step        RN 10
     65 
     66 ;// Pixels
     67 dP_0        DN D4.U8
     68 dP_1        DN D5.U8
     69 dP_2        DN D6.U8
     70 dP_3        DN D7.U8
     71 dQ_0        DN D8.U8
     72 dQ_1        DN D9.U8
     73 dQ_2        DN D10.U8
     74 dQ_3        DN D11.U8
     75 
     76 
     77 ;// Filtering Decision
     78 dAlpha      DN D0.U8
     79 dBeta       DN D2.U8
     80 
     81 dFilt       DN D16.U8
     82 dAqflg      DN D12.U8
     83 dApflg      DN D17.U8
     84 
     85 dAp0q0      DN D13.U8
     86 dAp1p0      DN D12.U8
     87 dAq1q0      DN D18.U8
     88 dAp2p0      DN D19.U8
     89 dAq2q0      DN D17.U8
     90 
     91 ;// bSLT4
     92 dTC0        DN D18.U8
     93 dTC1        DN D19.U8
     94 dTC01       DN D18.U8
     95 
     96 dTCs        DN D31.S8
     97 dTC         DN D31.U8
     98 
     99 dMask_0     DN D14.U8
    100 dMask_1     DN D15.U8
    101 
    102 Mask_0      RN 11
    103 
    104 dTemp       DN D19.U8
    105 
    106 ;// Computing P0,Q0
    107 qDq0p0      QN Q10.S16
    108 qDp1q1      QN Q11.S16
    109 qDelta      QN Q10.S16  ; reuse qDq0p0
    110 dDelta      DN D20.S8
    111 
    112 
    113 ;// Computing P1,Q1
    114 dRp0q0      DN D24.U8
    115 
    116 dMaxP       DN D23.U8
    117 dMinP       DN D22.U8
    118 
    119 dMaxQ       DN D19.U8
    120 dMinQ       DN D21.U8
    121 
    122 dDeltaP     DN D26.U8
    123 dDeltaQ     DN D27.U8
    124 
    125 qP_0n       QN Q14.S16
    126 qQ_0n       QN Q12.S16
    127 
    128 dQ_0n       DN D24.U8
    129 dQ_1n       DN D25.U8
    130 dP_0n       DN D29.U8
    131 dP_1n       DN D30.U8
    132 
    133 ;// bSGE4
    134 
    135 qSp0q0      QN Q10.U16
    136 
    137 qSp2q1      QN Q11.U16
    138 qSp0q0p1    QN Q12.U16
    139 qSp3p2      QN Q13.U16
    140 dHSp0q1     DN D28.U8
    141 
    142 qSq2p1      QN Q11.U16
    143 qSp0q0q1    QN Q12.U16
    144 qSq3q2      QN Q13.U16  ;!!
    145 dHSq0p1     DN D28.U8   ;!!
    146 
    147 qTemp1      QN Q11.U16  ;!!;qSp2q1
    148 qTemp2      QN Q12.U16  ;!!;qSp0q0p1
    149 
    150 dP_0t       DN D28.U8   ;!!;dHSp0q1
    151 dQ_0t       DN D22.U8   ;!!;Temp1
    152 
    153 dP_0n       DN D29.U8
    154 dP_1n       DN D30.U8
    155 dP_2n       DN D31.U8
    156 
    157 dQ_0n       DN D24.U8   ;!!;Temp2
    158 dQ_1n       DN D25.U8   ;!!;Temp2
    159 dQ_2n       DN D28.U8   ;!!;dQ_0t
    160 
    161 
    162         ;// Function header
    163         M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11, d15
    164 
    165         ;//Arguments on the stack
    166         M_ARG   ppThresholds, 4
    167         M_ARG   ppBS, 4
    168 
    169         ;// d0-dAlpha_0
    170         ;// d2-dBeta_0
    171 
    172         ADD         pAlpha_1, pAlpha_0, #1
    173         ADD         pBeta_1, pBeta_0, #1
    174 
    175         VLD1        {dAlpha[]}, [pAlpha_0]
    176         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
    177         VLD1        {dBeta[]}, [pBeta_0]
    178 
    179         M_LDR       pBS, ppBS
    180         M_LDR       pThresholds, ppThresholds
    181 
    182         MOV         Mask_0,#0
    183 
    184         ;dMask_0-14
    185         ;dMask_1-15
    186 
    187         VMOV        dMask_0, #0
    188         VMOV        dMask_1, #1
    189 
    190         ADD         step, srcdstStep, srcdstStep
    191 
    192         LDR         XY,=LOOP_COUNT
    193 
    194         ;// p0-p3 - d4-d7
    195         ;// q0-q3 - d8-d11
    196 LoopY
    197 LoopX
    198         LDRH        bS10, [pBS], #2
    199         ADD         pTmp, pSrcDst, srcdstStep
    200         CMP         bS10, #0
    201         BEQ         NoFilterBS0
    202 
    203         VLD1        dP_3, [pSrcDst], step
    204         VLD1        dP_2, [pTmp], step
    205         VLD1        dP_1, [pSrcDst], step
    206         VLD1        dP_0, [pTmp], step
    207         VLD1        dQ_0, [pSrcDst], step
    208         VABD        dAp1p0, dP_0, dP_1
    209         VLD1        dQ_1, [pTmp]
    210         VABD        dAp0q0, dQ_0, dP_0
    211         VLD1        dQ_2, [pSrcDst], srcdstStep
    212 
    213         VABD        dAq1q0, dQ_1, dQ_0
    214         VABD        dAp2p0, dP_2, dP_0
    215         VCGT        dFilt, dAlpha, dAp0q0
    216 
    217         TST         bS10, #0xff
    218         VMAX        dAp1p0, dAq1q0, dAp1p0
    219         VABD        dAq2q0, dQ_2, dQ_0
    220 
    221         VMOVEQ.U32  dFilt[0], Mask_0
    222         TST         bS10, #0xff00
    223 
    224         VCGT        dAp2p0, dBeta, dAp2p0
    225         VCGT        dAp1p0, dBeta, dAp1p0
    226 
    227         VMOVEQ.U32  dFilt[1], Mask_0
    228 
    229         VCGT        dAq2q0, dBeta, dAq2q0
    230         VLD1        dQ_3, [pSrcDst]
    231         VAND        dFilt, dFilt, dAp1p0
    232         TST         bS10, #4
    233 
    234         VAND        dAqflg, dFilt, dAq2q0
    235         VAND        dApflg, dFilt, dAp2p0
    236 
    237         BNE         bSGE4
    238 bSLT4
    239         ;// bS < 4 Filtering
    240         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
    241         SUB         pSrcDst, pSrcDst, srcdstStep
    242 
    243         BL          armVCM4P10_DeblockingLumabSLT4_unsafe
    244 
    245         ;// Result Storage
    246         VST1        dP_1n, [pSrcDst], srcdstStep
    247         VST1        dP_0n, [pSrcDst], srcdstStep
    248         SUB         pTmp, pSrcDst, srcdstStep, LSL #2
    249         VST1        dQ_0n, [pSrcDst], srcdstStep
    250         ADDS        XY, XY, XY
    251         VST1        dQ_1n, [pSrcDst]
    252         ADD         pSrcDst, pTmp, #8
    253 
    254         BCC         LoopX
    255         B           ExitLoopY
    256 
    257 NoFilterBS0
    258         ADD         pSrcDst, pSrcDst, #8
    259         ADDS        XY, XY, XY
    260         ADD         pThresholds, pThresholds, #2
    261         BCC         LoopX
    262         B           ExitLoopY
    263 bSGE4
    264         ;// bS >= 4 Filtering
    265         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
    266         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1
    267         BL          armVCM4P10_DeblockingLumabSGE4_unsafe
    268 
    269         ;// Result Storage
    270         VST1        dP_2n, [pSrcDst], srcdstStep
    271         VST1        dP_1n, [pSrcDst], srcdstStep
    272         VST1        dP_0n, [pSrcDst], srcdstStep
    273         SUB         pTmp, pSrcDst, srcdstStep, LSL #2
    274         VST1        dQ_0n, [pSrcDst], srcdstStep
    275         ADDS        XY,XY,XY
    276         VST1        dQ_1n, [pSrcDst], srcdstStep
    277         ADD         pThresholds, pThresholds, #2
    278         VST1        dQ_2n, [pSrcDst]
    279 
    280         ADD         pSrcDst, pTmp, #8
    281         BCC         LoopX
    282 
    283 ExitLoopY
    284 
    285         SUB         pSrcDst, pSrcDst, #16
    286         VLD1        {dAlpha[]}, [pAlpha_1]
    287         ADD         pSrcDst, pSrcDst, srcdstStep, LSL #2
    288         VLD1        {dBeta[]}, [pBeta_1]
    289         BNE         LoopY
    290 
    291         MOV         r0, #OMX_Sts_NoErr
    292 
    293         M_END
    294 
    295     ENDIF
    296 
    297 
    298 
    299 
    300         END
    301 
    302 
    303