Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   12290
     21 ;// Date:       Wednesday, April 9, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27 
     28         INCLUDE omxtypes_s.h
     29         INCLUDE armCOMM_s.h
     30 
     31         M_VARIANTS CortexA8
     32 
     33         IF CortexA8
     34 
     35         IMPORT  armVCM4P10_DeblockingChromabSGE4_unsafe
     36         IMPORT  armVCM4P10_DeblockingChromabSLT4_unsafe
     37 
     38 LOOP_COUNT  EQU 0x40000000
     39 MASK_3      EQU 0x03030303
     40 MASK_4      EQU 0x04040404
     41 
     42 ;// Function arguments
     43 
     44 pSrcDst     RN 0
     45 srcdstStep  RN 1
     46 pAlpha      RN 2
     47 pBeta       RN 3
     48 
     49 pThresholds RN 5
     50 pBS         RN 4
     51 bS3210      RN 6
     52 
     53 ;// Loop
     54 
     55 XY          RN 7
     56 
     57 ;// Pixels
     58 dP_0        DN D4.U8
     59 dP_1        DN D5.U8
     60 dP_2        DN D6.U8
     61 dQ_0        DN D8.U8
     62 dQ_1        DN D9.U8
     63 dQ_2        DN D10.U8
     64 
     65 ;// Filtering Decision
     66 dAlpha      DN D0.U8
     67 dBeta       DN D2.U8
     68 
     69 dFilt       DN D16.U8
     70 dAqflg      DN D12.U8
     71 dApflg      DN D17.U8
     72 
     73 dAp0q0      DN D13.U8
     74 dAp1p0      DN D12.U8
     75 dAq1q0      DN D18.U8
     76 dAp2p0      DN D19.U8
     77 dAq2q0      DN D17.U8
     78 
     79 qBS3210     QN Q13.U16
     80 dBS3210     DN D26
     81 dMask_bs    DN D27
     82 dFilt_bs    DN D26.U16
     83 
     84 ;// bSLT4
     85 dMask_0     DN D14.U8
     86 dMask_1     DN D15.U8
     87 dMask_4     DN D1.U16
     88 
     89 Mask_4      RN 8
     90 Mask_3      RN 9
     91 
     92 dTemp       DN D19.U8
     93 
     94 ;// Result
     95 dP_0t       DN D13.U8
     96 dQ_0t       DN D31.U8
     97 
     98 dP_0n       DN D29.U8
     99 dQ_0n       DN D24.U8
    100 
    101 
    102         ;// Function header
    103         M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r9, d15
    104 
    105         ;//Arguments on the stack
    106         M_ARG   ppThresholds, 4
    107         M_ARG   ppBS, 4
    108 
    109         ;// d0-dAlpha_0
    110         ;// d2-dBeta_0
    111 
    112         ;load alpha1,beta1 somewhere to avoid more loads
    113         VLD1        {dAlpha[]}, [pAlpha]!
    114         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1 ;?
    115         SUB         pSrcDst, pSrcDst, srcdstStep
    116         VLD1        {dBeta[]}, [pBeta]!
    117 
    118         M_LDR       pBS, ppBS
    119         M_LDR       pThresholds, ppThresholds
    120 
    121         LDR         Mask_3, =MASK_3
    122         LDR         Mask_4, =MASK_4
    123 
    124         VMOV        dMask_0, #0
    125         VMOV        dMask_1, #1
    126         VMOV        dMask_4, #4
    127 
    128         LDR         XY, =LOOP_COUNT
    129 
    130         ;// p0-p3 - d4-d7
    131         ;// q0-q3 - d8-d11
    132 LoopY
    133         LDR         bS3210, [pBS], #8
    134 
    135         VLD1        dP_2, [pSrcDst], srcdstStep
    136         ;1
    137         VLD1        dP_1, [pSrcDst], srcdstStep
    138         CMP         bS3210, #0
    139         VLD1        dP_0, [pSrcDst], srcdstStep
    140         ;1
    141         VLD1        dQ_0, [pSrcDst], srcdstStep
    142         VABD        dAp2p0, dP_2, dP_0
    143         VLD1        dQ_1, [pSrcDst], srcdstStep
    144         VABD        dAp0q0, dP_0, dQ_0
    145         VLD1        dQ_2, [pSrcDst], srcdstStep
    146         BEQ         NoFilterBS0
    147 
    148         VABD        dAp1p0, dP_1, dP_0
    149         VABD        dAq1q0, dQ_1, dQ_0
    150 
    151         VCGT        dFilt, dAlpha, dAp0q0
    152         VMOV.U32    dBS3210[0], bS3210
    153         VMAX        dAp1p0, dAq1q0, dAp1p0
    154         VMOVL       qBS3210, dBS3210.U8
    155         VABD        dAq2q0, dQ_2, dQ_0
    156         VCGT        dMask_bs.S16, dBS3210.S16, #0
    157 
    158         VCGT        dAp1p0, dBeta, dAp1p0
    159         VCGT        dAp2p0, dBeta, dAp2p0
    160 
    161         VAND        dFilt, dMask_bs.U8
    162 
    163         TST         bS3210, Mask_3
    164 
    165         VCGT        dAq2q0, dBeta, dAq2q0
    166         VAND        dFilt, dFilt, dAp1p0
    167 
    168         VAND        dAqflg, dFilt, dAq2q0
    169         VAND        dApflg, dFilt, dAp2p0
    170 
    171         ;// bS < 4 Filtering
    172         BLNE        armVCM4P10_DeblockingChromabSLT4_unsafe
    173 
    174         TST         bS3210, Mask_4
    175 
    176         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
    177         VTST        dFilt_bs, dFilt_bs, dMask_4
    178 
    179         ;// bS == 4 Filtering
    180         BLNE        armVCM4P10_DeblockingChromabSGE4_unsafe
    181 
    182         VBIT        dP_0n, dP_0t, dFilt_bs
    183         VBIT        dQ_0n, dQ_0t, dFilt_bs
    184 
    185         VBIF        dP_0n, dP_0, dFilt
    186         VBIF        dQ_0n, dQ_0, dFilt
    187 
    188         ;// Result Storage
    189         VST1        dP_0n, [pSrcDst], srcdstStep
    190         ADDS        XY, XY, XY
    191         VST1        dQ_0n, [pSrcDst], srcdstStep
    192 
    193         BNE         LoopY
    194 
    195         MOV         r0, #OMX_Sts_NoErr
    196 
    197         M_EXIT
    198 
    199 NoFilterBS0
    200 
    201         VLD1        {dAlpha[]}, [pAlpha]
    202         SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1
    203         ADDS        XY, XY, XY
    204         VLD1        {dBeta[]}, [pBeta]
    205         ADD         pThresholds, pThresholds, #4
    206         BNE         LoopY
    207 
    208         MOV         r0, #OMX_Sts_NoErr
    209         M_END
    210 
    211         ENDIF
    212 
    213 
    214         END
    215 
    216 
    217