Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   9641
      6 ;// Date:       Thursday, February 7, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13 
     14         INCLUDE omxtypes_s.h
     15         INCLUDE armCOMM_s.h
     16 
     17         M_VARIANTS ARM1136JS
     18 
     19 
     20         IF ARM1136JS
     21 
     22 MASK_0      EQU 0x00000000
     23 MASK_1      EQU 0x01010101
     24 LOOP_COUNT  EQU 0x50000000
     25 
     26 ;// Declare input registers
     27 
     28 pSrcDst     RN 0
     29 srcdstStep  RN 1
     30 pAlphaArg   RN 2
     31 pBetaArg    RN 3
     32 
     33 pThresholds RN 6
     34 pBS         RN 9
     35 pQ0         RN 0
     36 bS          RN 10
     37 
     38 alpha       RN 6
     39 alpha0      RN 6
     40 alpha1      RN 8
     41 
     42 beta        RN 7
     43 beta0       RN 7
     44 beta1       RN 9
     45 
     46 ;// Declare Local/Temporary variables
     47 
     48 ;// Pixels
     49 p_0         RN 3
     50 p_1         RN 5
     51 q_0         RN 8
     52 q_1         RN 9
     53 
     54 ;// Filtering
     55 
     56 dp0q0       RN 12
     57 dp1p0       RN 12
     58 dq1q0       RN 12
     59 
     60 ap0q0       RN 4
     61 filt        RN 2
     62 
     63 m00         RN 14
     64 m01         RN 11
     65 
     66 pQ0         RN 0
     67 Step        RN 1
     68 
     69 ;// Output
     70 
     71 P_0         RN 6
     72 Q_0         RN 7
     73 
     74 ;//Declarations for bSLT4 kernel
     75 
     76 tC          RN 12
     77 tC0         RN 5
     78 tC1         RN 12
     79 pos         RN 5
     80 neg         RN 9
     81 
     82 ;//Declarations for bSGE4 kernel
     83 
     84 
     85 ;// Miscellanous
     86 XY          RN 8
     87 
     88 a           RN 10
     89 t1          RN 10
     90 t2          RN 12
     91 t3          RN 14
     92 t4          RN 6
     93 t5          RN 5
     94 
     95 
     96         ;// Allocate stack memory
     97         M_ALLOC4 ppThresholds,4
     98         M_ALLOC8 pAlphaBeta0,8
     99         M_ALLOC8 pAlphaBeta1,8
    100         M_ALLOC8 pXYBS,4
    101         M_ALLOC4 ppBS,4
    102 
    103         ;// Function header
    104         M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r11
    105 
    106         ;//Input arguments on the stack
    107         M_ARG   ppThresholdsArg, 4
    108         M_ARG   ppBSArg, 4
    109 
    110         LDRB    alpha1, [pAlphaArg,#1]
    111         LDRB    beta1,  [pBetaArg,#1]
    112         M_LDR   pThresholds, ppThresholdsArg
    113         LDR     a,=MASK_1
    114         LDRB    beta0,  [pBetaArg]
    115         M_STR   pThresholds, ppThresholds
    116         LDRB    alpha0, [pAlphaArg]
    117 
    118         MUL     alpha1, alpha1, a
    119         MUL     beta1, beta1, a
    120         MUL     alpha0, alpha0, a
    121         MUL     beta0, beta0, a
    122 
    123         M_STRD  alpha1, beta1, pAlphaBeta1
    124         M_LDR   pBS, ppBSArg
    125         M_STRD  alpha0, beta0, pAlphaBeta0
    126 
    127         LDR     XY,=LOOP_COUNT
    128         M_STRD  XY, pBS, pXYBS
    129 
    130         SUB     pQ0, pQ0, srcdstStep, LSL #1
    131 LoopY
    132 LoopX
    133 ;//---------------Load Pixels-------------------
    134         LDRH    bS, [pBS], #2
    135 
    136         M_STR   pBS, ppBS
    137         M_LDR   p_1, [pQ0],srcdstStep
    138 
    139         CMP     bS, #0
    140 
    141         M_LDR   p_0, [pQ0],srcdstStep
    142         M_LDR   q_0, [pQ0],srcdstStep
    143         M_LDR   q_1, [pQ0]
    144         LDR     m01, =MASK_1                ;//  01010101 mask
    145         BEQ     NoFilterBS0
    146 
    147 
    148         ;// p_0 = [r3p0 r2p0 r1p0 r0p0]
    149         ;// p_1 = [r3p1 r2p1 r1p1 r0p1]
    150         ;// q_0 = [r3q0 r2q0 r1q0 r0q0]
    151         ;// q_1 = [r3q1 r2q1 r1q1 r0q1]
    152 
    153 ;//--------------Filtering Decision -------------------
    154         MOV     m00, #MASK_0                ;//  00000000 mask
    155 
    156         MOV     filt, m01
    157         TST     bS, #0xff00
    158         MOVEQ   filt, filt, LSR #16
    159         TST     bS, #0xff
    160         MOVEQ   filt, filt, LSL #16
    161         TST     bS, #4
    162 
    163 
    164         ;// Check |p0-q0|<Alpha
    165         USUB8   dp0q0, p_0, q_0
    166         USUB8   a, q_0, p_0
    167         SEL     ap0q0, a, dp0q0
    168         USUB8   a, ap0q0, alpha
    169         SEL     filt, m00, filt
    170 
    171         ;// Check |p1-p0|<Beta
    172         USUB8   dp1p0, p_1, p_0
    173         USUB8   a, p_0, p_1
    174         SEL     a, a, dp1p0
    175         USUB8   a, a, beta
    176         SEL     filt, m00, filt
    177 
    178         ;// Check |q1-q0|<Beta
    179         USUB8   dq1q0, q_1, q_0
    180         USUB8   a, q_0, q_1
    181         SEL     a, a, dq1q0
    182         USUB8   a, a, beta
    183         SEL     filt, m00, filt
    184 
    185         BEQ     bSLT4
    186 ;//-------------------Filter--------------------
    187 bSGE4
    188         ;//---------bSGE4 Execution---------------
    189         CMP     filt, #0
    190 
    191         M_LDR   pThresholds, ppThresholds
    192 
    193         ;// Compute P0b
    194         UHADD8  t1, p_0, q_1
    195         BEQ     NoFilterFilt0
    196         MVN     t2, p_1
    197         UHSUB8  t1, t1, t2
    198         USUB8   t2, filt, m01
    199         EOR     t1, t1, m01, LSL #7
    200 
    201         ADD     pThresholds,pThresholds, #2
    202 
    203         ;// Compute Q0b
    204         UHADD8  t2, q_0, p_1
    205         MVN     t3, q_1
    206         UHSUB8  t2, t2, t3
    207         M_STR   pThresholds, ppThresholds
    208         SEL     P_0, t1, p_0
    209         EOR     t2, t2, m01, LSL #7
    210         SEL     Q_0, t2, q_0
    211 
    212         SUB     pQ0, pQ0, srcdstStep, LSL #1
    213         B       StoreResultAndExit
    214 
    215 ;//---------- Exit of LoopX --------------
    216 ;//---- for the case of no filtering -----
    217 
    218 NoFilterFilt0
    219 NoFilterBS0
    220         M_LDR   pThresholds, ppThresholds
    221         SUB     pQ0, pQ0, srcdstStep, LSL #1
    222         SUB     pQ0, pQ0, srcdstStep
    223         ADD     pQ0, pQ0, #4
    224         ADD     pThresholds, pThresholds, #2
    225 
    226         ;// Load counter for LoopX
    227         M_LDRD  XY, pBS, pXYBS
    228         M_STR   pThresholds, ppThresholds
    229         M_LDRD  alpha, beta, pAlphaBeta0
    230 
    231         ;// Align the pointer
    232         ADDS    XY, XY, XY
    233         M_STR   XY, pXYBS
    234         BCC     LoopY
    235         B       ExitLoopY
    236 
    237 bSLT4
    238         ;//---------bSLT4 Execution---------------
    239         M_LDR   pThresholds, ppThresholds
    240         CMP     filt, #0
    241 
    242         ;// Since beta <= 18 and alpha <= 255 we know
    243         ;// -254 <= p0-q0 <= 254
    244         ;//  -17 <= q1-q0 <= 17
    245         ;//  -17 <= p1-p0 <= 17
    246 
    247         ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
    248         ;//
    249         ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
    250         ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
    251         ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
    252 
    253         USUB8   t1, p_1, p_0
    254         USUB8   t2, q_1, q_0
    255         BEQ     NoFilterFilt0
    256 
    257         LDRB    tC0, [pThresholds],#1
    258         SSUB8   t1, t1, t2
    259         LDRB    tC1, [pThresholds],#1
    260         M_STR   pThresholds, ppThresholds
    261         UHSUB8  t4, p_0, q_0
    262         ORR     tC, tC0, tC1, LSL #16
    263         USUB8   t5, p_0, q_0
    264         AND     t5, t5, m01
    265         SHSUB8  t1, t1, t5
    266         ORR     tC, tC, LSL #8
    267         SSUB8   t1, t1, t5
    268         SHSUB8  t1, t1, t4
    269         UQADD8  tC, tC, m01
    270         SADD8   t1, t1, m01
    271         USUB8   t5, filt, m01
    272         SHSUB8  t1, t1, t4
    273         SEL     tC, tC, m00
    274 
    275         ;// Split into positive and negative part and clip
    276 
    277         SSUB8   t1, t1, m00
    278         SEL     pos, t1, m00
    279         USUB8   neg, pos, t1
    280         USUB8   t3, pos, tC
    281         SEL     pos, tC, pos
    282         USUB8   t3, neg, tC
    283         SEL     neg, tC, neg
    284         UQADD8  P_0, p_0, pos
    285         UQSUB8  Q_0, q_0, pos
    286         UQSUB8  P_0, P_0, neg
    287         UQADD8  Q_0, Q_0, neg
    288 
    289         SUB     pQ0, pQ0, srcdstStep, LSL #1
    290 
    291         ;// Choose to store the filtered
    292         ;// value or the original pixel
    293         USUB8   t1, filt, m01
    294         SEL     P_0, P_0, p_0
    295         SEL     Q_0, Q_0, q_0
    296 
    297 StoreResultAndExit
    298 
    299         ;//---------Store result---------------
    300 
    301         ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
    302         ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
    303 
    304         M_STR   P_0, [pQ0], srcdstStep
    305         STR     Q_0, [pQ0], #4
    306 
    307         M_LDRD  XY, pBS, pXYBS
    308         M_LDRD  alpha, beta, pAlphaBeta0
    309 
    310         SUB     pQ0, pQ0, srcdstStep, LSL #1
    311 
    312         ADDS    XY, XY, XY
    313         M_STR   XY, pXYBS
    314         BCC     LoopX
    315 
    316 ;//-------- Common Exit of LoopY -----------------
    317         ;// Align the pointers
    318 
    319 ExitLoopY
    320         ADD     pBS, pBS, #4
    321         M_LDRD  alpha, beta, pAlphaBeta1
    322         SUB     pQ0, pQ0, #8
    323         ADD     pQ0, pQ0, srcdstStep, LSL #2
    324         M_STRD  alpha, beta, pAlphaBeta0
    325 
    326         BNE     LoopY
    327         MOV     r0, #OMX_Sts_NoErr
    328 
    329 ;//-----------------End Filter--------------------
    330         M_END
    331 
    332     ENDIF
    333 
    334         END
    335 
    336 
    337