Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  armVCM4P10_DeblockingChroma_unsafe_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   12290
     21 ;// Date:       Wednesday, April 9, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27         INCLUDE omxtypes_s.h
     28         INCLUDE armCOMM_s.h
     29 
     30         M_VARIANTS CortexA8
     31 
     32 
     33     IF  CortexA8
     34 
     35 pAlpha      RN 2
     36 pBeta       RN 3
     37 
     38 pThresholds RN 5
     39 pBS         RN 4
     40 bS3210      RN 6
     41 
     42 ;// Pixels
     43 dP_0        DN D4.U8
     44 dP_1        DN D5.U8
     45 dP_2        DN D6.U8
     46 dP_3        DN D7.U8
     47 dQ_0        DN D8.U8
     48 dQ_1        DN D9.U8
     49 dQ_2        DN D10.U8
     50 dQ_3        DN D11.U8
     51 
     52 
     53 ;// Filtering Decision
     54 dAlpha      DN D0.U8
     55 dBeta       DN D2.U8
     56 
     57 dFilt       DN D16.U8
     58 dAqflg      DN D12.U8
     59 dApflg      DN D17.U8
     60 
     61 dAp0q0      DN D13.U8
     62 
     63 ;// bSLT4
     64 dTC3210     DN D18.U8
     65 dTCs        DN D31.S8
     66 dTC         DN D31.U8
     67 
     68 dMask_0     DN D14.U8
     69 dMask_1     DN D15.U8
     70 dMask_4     DN D26.U16
     71 
     72 dTemp       DN D28.U8
     73 dDummy      DN D17.U8
     74 
     75 ;// Computing P0,Q0
     76 qDq0p0      QN Q10.S16
     77 qDp1q1      QN Q11.S16
     78 qDelta      QN Q10.S16  ; reuse qDq0p0
     79 dDelta      DN D20.S8
     80 
     81 
     82 ;// Computing P1,Q1
     83 qP_0n       QN Q14.S16
     84 qQ_0n       QN Q12.S16
     85 
     86 dQ_0n       DN D24.U8
     87 dP_0n       DN D29.U8
     88 
     89 ;// bSGE4
     90 
     91 dHSp0q1     DN D13.U8
     92 dHSq0p1     DN D31.U8
     93 
     94 dBS3210     DN D28.U16
     95 
     96 dP_0t       DN D13.U8   ;dHSp0q1
     97 dQ_0t       DN D31.U8   ;Temp1
     98 
     99 dP_0n       DN D29.U8
    100 dQ_0n       DN D24.U8   ;Temp2
    101 
    102 ;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe
    103 ;//
    104 ;// Inputs - Pixels             - p0-p3: D4-D7, q0-q3: D8-D11
    105 ;//        - Filter masks       - filt: D16, aqflg: D12, apflg: D17
    106 ;//        - Additional Params  - pThresholds: r5
    107 ;//
    108 ;// Outputs - Pixels            - P0-P1: D29-D30, Q0-Q1: D24-D25
    109 ;//         - Additional Params - pThresholds: r5
    110 
    111 ;// Registers Corrupted         - D18-D31
    112 
    113 
    114         M_START armVCM4P10_DeblockingChromabSLT4_unsafe
    115 
    116 
    117         ;dTC3210 -18
    118         ;dTemp-28
    119 
    120         VLD1        d18.U32[0], [pThresholds]! ;here
    121 
    122         ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3;
    123         ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1
    124 
    125         ;// qDp1q1-11
    126         ;// qDq0p0-10
    127         VSUBL       qDp1q1, dP_1, dQ_1
    128         VMOV        dTemp, dTC3210
    129         VSUBL       qDq0p0, dQ_0, dP_0
    130         VSHR        qDp1q1, qDp1q1, #2
    131         VZIP.8      dTC3210, dTemp
    132 
    133         ;// qDelta-qDq0p0-10
    134 
    135         ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1)
    136 
    137         ;// dTC3210-18
    138         ;// dTemp-28
    139         ;// dTC-31
    140         VBIF        dTC3210, dMask_0, dFilt
    141         VRHADD      qDelta, qDp1q1, qDq0p0
    142         VADD        dTC, dTC3210, dMask_1
    143         VQMOVN      dDelta, qDelta
    144         ;// dDelta-d20
    145 
    146         ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta);
    147         VLD1        {dAlpha[]}, [pAlpha]
    148         VMIN        dDelta, dDelta, dTCs
    149         VNEG        dTCs, dTCs
    150         VLD1        {dBeta[]}, [pBeta]
    151         ;1
    152         VMAX        dDelta, dDelta, dTCs
    153 
    154         ;// dP_0n - 29
    155         ;// dQ_0n - 24
    156 
    157         ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta);
    158         ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta);
    159 
    160         ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
    161         ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
    162 
    163         ;// qP_0n - 14
    164         ;// qQ_0n - 12
    165 
    166         VMOVL       qP_0n, dP_0
    167         VMOVL       qQ_0n, dQ_0
    168 
    169         ;1
    170         VADDW       qP_0n, qP_0n, dDelta
    171         VSUBW       qQ_0n, qQ_0n, dDelta
    172 
    173         VQMOVUN     dP_0n, qP_0n
    174         VQMOVUN     dQ_0n, qQ_0n
    175 
    176         M_END
    177 
    178 ;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
    179 ;//
    180 ;// Inputs - Pixels             - p0-p3: D4-D7, q0-q3: D8-D11
    181 ;//        - Filter masks       - filt: D16, aqflg: D12, apflg: D17
    182 ;//        - Additional Params  - alpha: D0, dMask_1: D15
    183 ;//
    184 ;// Outputs - Pixels            - P0-P2: D29-D31, Q0-Q2: D24,D25,D28
    185 
    186 ;// Registers Corrupted         - D18-D31
    187 
    188         M_START armVCM4P10_DeblockingChromabSGE4_unsafe
    189 
    190         ;dHSq0p1 - 31
    191         ;dHSp0q1 - 13
    192         VHADD       dHSp0q1, dP_0, dQ_1
    193         VHADD       dHSq0p1, dQ_0, dP_1
    194 
    195         ;// Prepare the bS mask
    196 
    197         ;// dHSp0q1-13
    198         ;// dP_0t-dHSp0q1-13
    199         ;// dHSq0p1-31
    200         ;// dQ_0t-Temp1-31
    201         VLD1        {dAlpha[]}, [pAlpha]
    202         ADD         pThresholds, pThresholds, #4
    203         VLD1        {dBeta[]}, [pBeta]
    204 
    205         VRHADD      dP_0t, dHSp0q1, dP_1
    206         VRHADD      dQ_0t, dHSq0p1, dQ_1
    207 
    208         M_END
    209 
    210         ENDIF
    211 
    212         END
    213