Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   9641
     21 ;// Date:       Thursday, February 7, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27 
     28         INCLUDE omxtypes_s.h
     29         INCLUDE armCOMM_s.h
     30 
     31         M_VARIANTS ARM1136JS
     32 
     33 
     34         IF ARM1136JS
     35 
     36 MASK_0      EQU 0x00000000
     37 MASK_1      EQU 0x01010101
     38 LOOP_COUNT  EQU 0x50000000
     39 
     40 ;// Declare input registers
     41 
     42 pSrcDst     RN 0
     43 srcdstStep  RN 1
     44 pAlphaArg   RN 2
     45 pBetaArg    RN 3
     46 
     47 pThresholds RN 6
     48 pBS         RN 9
     49 pQ0         RN 0
     50 bS          RN 10
     51 
     52 alpha       RN 6
     53 alpha0      RN 6
     54 alpha1      RN 8
     55 
     56 beta        RN 7
     57 beta0       RN 7
     58 beta1       RN 9
     59 
     60 ;// Declare Local/Temporary variables
     61 
     62 ;// Pixels
     63 p_0         RN 3
     64 p_1         RN 5
     65 q_0         RN 8
     66 q_1         RN 9
     67 
     68 ;// Filtering
     69 
     70 dp0q0       RN 12
     71 dp1p0       RN 12
     72 dq1q0       RN 12
     73 
     74 ap0q0       RN 4
     75 filt        RN 2
     76 
     77 m00         RN 14
     78 m01         RN 11
     79 
     80 pQ0         RN 0
     81 Step        RN 1
     82 
     83 ;// Output
     84 
     85 P_0         RN 6
     86 Q_0         RN 7
     87 
     88 ;//Declarations for bSLT4 kernel
     89 
     90 tC          RN 12
     91 tC0         RN 5
     92 tC1         RN 12
     93 pos         RN 5
     94 neg         RN 9
     95 
     96 ;//Declarations for bSGE4 kernel
     97 
     98 
     99 ;// Miscellanous
    100 XY          RN 8
    101 
    102 a           RN 10
    103 t1          RN 10
    104 t2          RN 12
    105 t3          RN 14
    106 t4          RN 6
    107 t5          RN 5
    108 
    109 
    110         ;// Allocate stack memory
    111         M_ALLOC4 ppThresholds,4
    112         M_ALLOC8 pAlphaBeta0,8
    113         M_ALLOC8 pAlphaBeta1,8
    114         M_ALLOC8 pXYBS,4
    115         M_ALLOC4 ppBS,4
    116 
    117         ;// Function header
    118         M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r11
    119 
    120         ;//Input arguments on the stack
    121         M_ARG   ppThresholdsArg, 4
    122         M_ARG   ppBSArg, 4
    123 
    124         LDRB    alpha1, [pAlphaArg,#1]
    125         LDRB    beta1,  [pBetaArg,#1]
    126         M_LDR   pThresholds, ppThresholdsArg
    127         LDR     a,=MASK_1
    128         LDRB    beta0,  [pBetaArg]
    129         M_STR   pThresholds, ppThresholds
    130         LDRB    alpha0, [pAlphaArg]
    131 
    132         MUL     alpha1, alpha1, a
    133         MUL     beta1, beta1, a
    134         MUL     alpha0, alpha0, a
    135         MUL     beta0, beta0, a
    136 
    137         M_STRD  alpha1, beta1, pAlphaBeta1
    138         M_LDR   pBS, ppBSArg
    139         M_STRD  alpha0, beta0, pAlphaBeta0
    140 
    141         LDR     XY,=LOOP_COUNT
    142         M_STRD  XY, pBS, pXYBS
    143 
    144         SUB     pQ0, pQ0, srcdstStep, LSL #1
    145 LoopY
    146 LoopX
    147 ;//---------------Load Pixels-------------------
    148         LDRH    bS, [pBS], #2
    149 
    150         M_STR   pBS, ppBS
    151         M_LDR   p_1, [pQ0],srcdstStep
    152 
    153         CMP     bS, #0
    154 
    155         M_LDR   p_0, [pQ0],srcdstStep
    156         M_LDR   q_0, [pQ0],srcdstStep
    157         M_LDR   q_1, [pQ0]
    158         LDR     m01, =MASK_1                ;//  01010101 mask
    159         BEQ     NoFilterBS0
    160 
    161 
    162         ;// p_0 = [r3p0 r2p0 r1p0 r0p0]
    163         ;// p_1 = [r3p1 r2p1 r1p1 r0p1]
    164         ;// q_0 = [r3q0 r2q0 r1q0 r0q0]
    165         ;// q_1 = [r3q1 r2q1 r1q1 r0q1]
    166 
    167 ;//--------------Filtering Decision -------------------
    168         MOV     m00, #MASK_0                ;//  00000000 mask
    169 
    170         MOV     filt, m01
    171         TST     bS, #0xff00
    172         MOVEQ   filt, filt, LSR #16
    173         TST     bS, #0xff
    174         MOVEQ   filt, filt, LSL #16
    175         TST     bS, #4
    176 
    177 
    178         ;// Check |p0-q0|<Alpha
    179         USUB8   dp0q0, p_0, q_0
    180         USUB8   a, q_0, p_0
    181         SEL     ap0q0, a, dp0q0
    182         USUB8   a, ap0q0, alpha
    183         SEL     filt, m00, filt
    184 
    185         ;// Check |p1-p0|<Beta
    186         USUB8   dp1p0, p_1, p_0
    187         USUB8   a, p_0, p_1
    188         SEL     a, a, dp1p0
    189         USUB8   a, a, beta
    190         SEL     filt, m00, filt
    191 
    192         ;// Check |q1-q0|<Beta
    193         USUB8   dq1q0, q_1, q_0
    194         USUB8   a, q_0, q_1
    195         SEL     a, a, dq1q0
    196         USUB8   a, a, beta
    197         SEL     filt, m00, filt
    198 
    199         BEQ     bSLT4
    200 ;//-------------------Filter--------------------
    201 bSGE4
    202         ;//---------bSGE4 Execution---------------
    203         CMP     filt, #0
    204 
    205         M_LDR   pThresholds, ppThresholds
    206 
    207         ;// Compute P0b
    208         UHADD8  t1, p_0, q_1
    209         BEQ     NoFilterFilt0
    210         MVN     t2, p_1
    211         UHSUB8  t1, t1, t2
    212         USUB8   t2, filt, m01
    213         EOR     t1, t1, m01, LSL #7
    214 
    215         ADD     pThresholds,pThresholds, #2
    216 
    217         ;// Compute Q0b
    218         UHADD8  t2, q_0, p_1
    219         MVN     t3, q_1
    220         UHSUB8  t2, t2, t3
    221         M_STR   pThresholds, ppThresholds
    222         SEL     P_0, t1, p_0
    223         EOR     t2, t2, m01, LSL #7
    224         SEL     Q_0, t2, q_0
    225 
    226         SUB     pQ0, pQ0, srcdstStep, LSL #1
    227         B       StoreResultAndExit
    228 
    229 ;//---------- Exit of LoopX --------------
    230 ;//---- for the case of no filtering -----
    231 
    232 NoFilterFilt0
    233 NoFilterBS0
    234         M_LDR   pThresholds, ppThresholds
    235         SUB     pQ0, pQ0, srcdstStep, LSL #1
    236         SUB     pQ0, pQ0, srcdstStep
    237         ADD     pQ0, pQ0, #4
    238         ADD     pThresholds, pThresholds, #2
    239 
    240         ;// Load counter for LoopX
    241         M_LDRD  XY, pBS, pXYBS
    242         M_STR   pThresholds, ppThresholds
    243         M_LDRD  alpha, beta, pAlphaBeta0
    244 
    245         ;// Align the pointer
    246         ADDS    XY, XY, XY
    247         M_STR   XY, pXYBS
    248         BCC     LoopY
    249         B       ExitLoopY
    250 
    251 bSLT4
    252         ;//---------bSLT4 Execution---------------
    253         M_LDR   pThresholds, ppThresholds
    254         CMP     filt, #0
    255 
    256         ;// Since beta <= 18 and alpha <= 255 we know
    257         ;// -254 <= p0-q0 <= 254
    258         ;//  -17 <= q1-q0 <= 17
    259         ;//  -17 <= p1-p0 <= 17
    260 
    261         ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
    262         ;//
    263         ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
    264         ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
    265         ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
    266 
    267         USUB8   t1, p_1, p_0
    268         USUB8   t2, q_1, q_0
    269         BEQ     NoFilterFilt0
    270 
    271         LDRB    tC0, [pThresholds],#1
    272         SSUB8   t1, t1, t2
    273         LDRB    tC1, [pThresholds],#1
    274         M_STR   pThresholds, ppThresholds
    275         UHSUB8  t4, p_0, q_0
    276         ORR     tC, tC0, tC1, LSL #16
    277         USUB8   t5, p_0, q_0
    278         AND     t5, t5, m01
    279         SHSUB8  t1, t1, t5
    280         ORR     tC, tC, LSL #8
    281         SSUB8   t1, t1, t5
    282         SHSUB8  t1, t1, t4
    283         UQADD8  tC, tC, m01
    284         SADD8   t1, t1, m01
    285         USUB8   t5, filt, m01
    286         SHSUB8  t1, t1, t4
    287         SEL     tC, tC, m00
    288 
    289         ;// Split into positive and negative part and clip
    290 
    291         SSUB8   t1, t1, m00
    292         SEL     pos, t1, m00
    293         USUB8   neg, pos, t1
    294         USUB8   t3, pos, tC
    295         SEL     pos, tC, pos
    296         USUB8   t3, neg, tC
    297         SEL     neg, tC, neg
    298         UQADD8  P_0, p_0, pos
    299         UQSUB8  Q_0, q_0, pos
    300         UQSUB8  P_0, P_0, neg
    301         UQADD8  Q_0, Q_0, neg
    302 
    303         SUB     pQ0, pQ0, srcdstStep, LSL #1
    304 
    305         ;// Choose to store the filtered
    306         ;// value or the original pixel
    307         USUB8   t1, filt, m01
    308         SEL     P_0, P_0, p_0
    309         SEL     Q_0, Q_0, q_0
    310 
    311 StoreResultAndExit
    312 
    313         ;//---------Store result---------------
    314 
    315         ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
    316         ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
    317 
    318         M_STR   P_0, [pQ0], srcdstStep
    319         STR     Q_0, [pQ0], #4
    320 
    321         M_LDRD  XY, pBS, pXYBS
    322         M_LDRD  alpha, beta, pAlphaBeta0
    323 
    324         SUB     pQ0, pQ0, srcdstStep, LSL #1
    325 
    326         ADDS    XY, XY, XY
    327         M_STR   XY, pXYBS
    328         BCC     LoopX
    329 
    330 ;//-------- Common Exit of LoopY -----------------
    331         ;// Align the pointers
    332 
    333 ExitLoopY
    334         ADD     pBS, pBS, #4
    335         M_LDRD  alpha, beta, pAlphaBeta1
    336         SUB     pQ0, pQ0, #8
    337         ADD     pQ0, pQ0, srcdstStep, LSL #2
    338         M_STRD  alpha, beta, pAlphaBeta0
    339 
    340         BNE     LoopY
    341         MOV     r0, #OMX_Sts_NoErr
    342 
    343 ;//-----------------End Filter--------------------
    344         M_END
    345 
    346     ENDIF
    347 
    348         END
    349 
    350 
    351