Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   9641
      6 ;// Date:       Thursday, February 7, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13         INCLUDE omxtypes_s.h
     14         INCLUDE armCOMM_s.h
     15 
     16         M_VARIANTS ARM1136JS
     17 
     18 
     19         IF ARM1136JS
     20 
     21 
     22 MASK_0      EQU 0x00000000
     23 MASK_1      EQU 0x01010101
     24 MASK_2      EQU 0x0000ff00
     25 LOOP_COUNT  EQU 0x50000000
     26 
     27 ;// Declare input registers
     28 
     29 pSrcDst     RN 0
     30 srcdstStep  RN 1
     31 pAlphaArg   RN 2
     32 pBetaArg    RN 3
     33 
     34 pThresholds RN 6
     35 pBS         RN 9
     36 pQ0         RN 0
     37 bS          RN 2
     38 bSTemp      RN 10
     39 
     40 alpha       RN 6
     41 alpha0      RN 6
     42 alpha1      RN 8
     43 
     44 beta        RN 7
     45 beta0       RN 7
     46 beta1       RN 9
     47 
     48 ;// Declare Local/Temporary variables
     49 
     50 ;// Pixels
     51 p_0         RN 3
     52 p_1         RN 5
     53 q_0         RN 8
     54 q_1         RN 9
     55 
     56 ;// Unpacking
     57 mask        RN 11
     58 
     59 row0        RN 2
     60 row1        RN 4
     61 row2        RN 5
     62 row3        RN 3
     63 
     64 row4        RN 8
     65 row5        RN 9
     66 row6        RN 10
     67 row7        RN 12
     68 
     69 tunpk0      RN 2
     70 tunpk2      RN 10
     71 tunpk3      RN 12
     72 
     73 tunpk4      RN 4
     74 tunpk5      RN 5
     75 tunpk6      RN 14
     76 tunpk7      RN 2
     77 
     78 ;// Filtering
     79 
     80 dp0q0       RN 12
     81 dp1p0       RN 12
     82 dq1q0       RN 12
     83 
     84 ap0q0       RN 4
     85 filt        RN 2
     86 
     87 m00         RN 14
     88 m01         RN 11
     89 
     90 pQ0         RN 0
     91 Step        RN 1
     92 
     93 ;// Output
     94 
     95 P_0         RN 6
     96 Q_0         RN 7
     97 
     98 ;//Declarations for bSLT4 kernel
     99 
    100 tC          RN 12
    101 tC0         RN 5
    102 tC1         RN 12
    103 pos         RN 5
    104 neg         RN 9
    105 
    106 ;//Declarations for bSGE4 kernel
    107 
    108 
    109 ;// Miscellanous
    110 XY          RN 8
    111 
    112 a           RN 10
    113 t1          RN 10
    114 t2          RN 12
    115 t3          RN 14
    116 t4          RN 6
    117 t5          RN 5
    118 
    119 
    120         ;// Allocate stack memory
    121         M_ALLOC4 ppThresholds,4
    122         M_ALLOC8 pAlphaBeta0,8
    123         M_ALLOC8 pAlphaBeta1,8
    124         M_ALLOC8 pXYBS,4
    125         M_ALLOC4 ppBS,4
    126 
    127         ;// Function header
    128         M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r11
    129 
    130         ;//Input arguments on the stack
    131         M_ARG   ppThresholdsArg, 4
    132         M_ARG   ppBSArg, 4
    133 
    134         LDRB    alpha1, [pAlphaArg,#1]
    135         LDRB    beta1,  [pBetaArg,#1]
    136         M_LDR   pThresholds, ppThresholdsArg
    137         LDR     a,=MASK_1
    138         LDRB    beta0,  [pBetaArg]
    139         M_STR   pThresholds, ppThresholds
    140         LDRB    alpha0, [pAlphaArg]
    141 
    142         MUL     alpha1, alpha1, a
    143         MUL     beta1, beta1, a
    144         MUL     alpha0, alpha0, a
    145         MUL     beta0, beta0, a
    146 
    147         M_STRD  alpha1, beta1, pAlphaBeta1
    148         M_LDR   pBS, ppBSArg
    149         M_STRD  alpha0, beta0, pAlphaBeta0
    150 
    151         LDR     XY,=LOOP_COUNT
    152         M_STRD  XY, pBS, pXYBS
    153 
    154 
    155 LoopY
    156 LoopX
    157 ;//---------------Load Pixels-------------------
    158 
    159 ;//----------------Pack q0-q1-----------------------
    160         LDRH    bS, [pBS], #8
    161         LDR     mask, =MASK_2
    162 
    163         M_LDRH  row4, [pQ0], srcdstStep
    164         CMP     bS, #0
    165         M_STR   pBS, ppBS
    166         M_LDRH  row5, [pQ0], srcdstStep
    167         BEQ.W   NoFilterBS0
    168         LDRH    row6, [pQ0]
    169         LDRH    row7, [pQ0, srcdstStep]
    170 
    171         ;// row4 = [0 0 r0q0 r0q1]
    172         ;// row5 = [0 0 r1q0 r1q1]
    173         ;// row6 = [0 0 r2q0 r2q1]
    174         ;// row7 = [0 0 r3q0 r3q1]
    175 
    176         AND     tunpk4, mask, row4
    177         AND     tunpk5, mask, row4, LSL#8
    178         UXTAB   tunpk4, tunpk4, row5, ROR#8
    179         UXTAB   tunpk5, tunpk5, row5
    180         AND     tunpk6, mask, row6
    181         AND     tunpk7, mask, row6, LSL#8
    182         UXTAB   tunpk6, tunpk6, row7, ROR#8
    183         UXTAB   tunpk7, tunpk7, row7
    184 
    185         ;// tunpk4 = [0 0 r0q0 r1q0]
    186         ;// tunpk5 = [0 0 r0q1 r1q1]
    187         ;// tunpk6 = [0 0 r2q0 r3q0]
    188         ;// tunpk7 = [0 0 r2q1 r3q1]
    189 
    190         SUB     pQ0, pQ0, srcdstStep, LSL #1
    191         SUB     pQ0, pQ0, #2
    192 
    193         PKHBT   q_1, tunpk6, tunpk4, LSL#16
    194         PKHBT   q_0, tunpk7, tunpk5, LSL#16
    195 
    196         ;// q_0 = [r0q0 r1q0 r2q0 r3q0]
    197         ;// q_1 = [r0q1 r1q1 r2q1 r3q1]
    198 
    199 
    200 ;//----------------Pack p0-p1-----------------------
    201 
    202         M_LDRH  row0, [pQ0], srcdstStep
    203         M_LDRH  row1, [pQ0], srcdstStep
    204         LDRH    row2, [pQ0]
    205         LDRH    row3, [pQ0, srcdstStep]
    206 
    207         ;// row0 = [0 0 r0p0 r0p1]
    208         ;// row1 = [0 0 r1p0 r1p1]
    209         ;// row2 = [0 0 r2p0 r2p1]
    210         ;// row3 = [0 0 r3p0 r3p1]
    211 
    212         AND     tunpk2, mask, row0
    213         AND     tunpk6, mask, row0, LSL#8
    214         UXTAB   tunpk2, tunpk2, row1, ROR#8
    215         UXTAB   tunpk6, tunpk6, row1
    216 
    217         AND     tunpk0, mask, row2
    218         AND     tunpk3, mask, row2, LSL#8
    219         UXTAB   tunpk0, tunpk0, row3, ROR#8
    220         UXTAB   tunpk3, tunpk3, row3
    221 
    222         ;// tunpk2 = [0 0 r0p0 r1p0]
    223         ;// tunpk6 = [0 0 r0p1 r1p1]
    224         ;// tunpk0 = [0 0 r2p0 r3p0]
    225         ;// tunpk3 = [0 0 r2p1 r3p1]
    226 
    227         PKHBT   p_0, tunpk0, tunpk2, LSL#16
    228         M_LDR   bSTemp, ppBS
    229         PKHBT   p_1, tunpk3, tunpk6, LSL#16
    230 
    231         ;// p_0 = [r0p0 r1p0 r2p0 r3p0]
    232         ;// p_1 = [r0p1 r1p1 r2p1 r3p1]
    233 
    234 ;//--------------Filtering Decision -------------------
    235         USUB8   dp0q0, p_0, q_0
    236         LDR     m01, =MASK_1
    237         LDRH    bSTemp, [bSTemp ,#-8]
    238         MOV     m00, #MASK_0                ;//  00000000 mask
    239 
    240         MOV     filt, m01
    241         TST     bSTemp, #0xff00
    242         MOVEQ   filt, filt, LSL #16
    243         TST     bSTemp, #0xff
    244         MOVEQ   filt, filt, LSR #16
    245         TST     bSTemp, #4
    246 
    247         ;// Check |p0-q0|<Alpha
    248         USUB8   a, q_0, p_0
    249         SEL     ap0q0, a, dp0q0
    250         USUB8   a, ap0q0, alpha
    251         SEL     filt, m00, filt
    252 
    253         ;// Check |p1-p0|<Beta
    254         USUB8   dp1p0, p_1, p_0
    255         USUB8   a, p_0, p_1
    256         SEL     a, a, dp1p0
    257         USUB8   a, a, beta
    258         SEL     filt, m00, filt
    259 
    260         ;// Check |q1-q0|<Beta
    261         USUB8   dq1q0, q_1, q_0
    262         USUB8   a, q_0, q_1
    263         SEL     a, a, dq1q0
    264         USUB8   a, a, beta
    265         SEL     filt, m00, filt
    266 
    267         BEQ     bSLT4
    268 ;//-------------------Filter--------------------
    269 bSGE4
    270         ;//---------bSGE4 Execution---------------
    271         CMP     filt, #0
    272 
    273         M_LDR   pThresholds, ppThresholds
    274 
    275         ;// Compute P0b
    276         UHADD8  t1, p_0, q_1
    277         BEQ     NoFilterFilt0
    278         MVN     t2, p_1
    279         UHSUB8  t1, t1, t2
    280         USUB8   t2, filt, m01
    281         EOR     t1, t1, m01, LSL #7
    282 
    283         ADD     pThresholds,pThresholds, #4
    284 
    285         ;// Compute Q0b
    286         UHADD8  t2, q_0, p_1
    287         MVN     t3, q_1
    288         UHSUB8  t2, t2, t3
    289         M_STR   pThresholds, ppThresholds
    290         SEL     P_0, t1, p_0
    291         EOR     t2, t2, m01, LSL #7
    292         SEL     Q_0, t2, q_0
    293 
    294         B       StoreResultAndExit
    295 
    296 ;//---------- Exit of LoopX --------------
    297 ;//---- for the case of no filtering -----
    298 
    299 NoFilterFilt0
    300         ADD     pQ0, pQ0, #2
    301 NoFilterBS0
    302         M_LDR   pThresholds, ppThresholds
    303         SUB     pQ0, pQ0, srcdstStep, LSL #1
    304         ADD     pQ0, pQ0, #4
    305         ADD     pThresholds, pThresholds, #4
    306         ;// Load counter for LoopX
    307         M_LDRD  XY, pBS, pXYBS
    308         M_STR   pThresholds, ppThresholds
    309         M_LDRD  alpha, beta, pAlphaBeta1
    310 
    311         ;// Align the pointer
    312         ADDS    XY, XY, XY
    313         M_STR   XY, pXYBS
    314         BCC     LoopY
    315         B       ExitLoopY
    316 
    317 bSLT4
    318         ;//---------bSLT4 Execution---------------
    319         M_LDR   pThresholds, ppThresholds
    320         CMP     filt, #0
    321 
    322 
    323         ;// Since beta <= 18 and alpha <= 255 we know
    324         ;// -254 <= p0-q0 <= 254
    325         ;//  -17 <= q1-q0 <= 17
    326         ;//  -17 <= p1-p0 <= 17
    327 
    328         ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
    329         ;//
    330         ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
    331         ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
    332         ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
    333 
    334         USUB8   t1, p_1, p_0
    335         USUB8   t2, q_1, q_0
    336         BEQ     NoFilterFilt0
    337 
    338         LDRB    tC0, [pThresholds], #1
    339         SSUB8   t1, t1, t2
    340         LDRB    tC1, [pThresholds], #3
    341         M_STR   pThresholds, ppThresholds
    342         UHSUB8  t4, p_0, q_0
    343         ORR     tC, tC1, tC0, LSL #16
    344         USUB8   t5, p_0, q_0
    345         AND     t5, t5, m01
    346         SHSUB8  t1, t1, t5
    347         ORR     tC, tC, LSL #8
    348         SSUB8   t1, t1, t5
    349         SHSUB8  t1, t1, t4
    350         UQADD8  tC, tC, m01
    351         SADD8   t1, t1, m01
    352         USUB8   t5, filt, m01
    353         SHSUB8  t1, t1, t4
    354         SEL     tC, tC, m00
    355 
    356         ;// Split into positive and negative part and clip
    357 
    358         SSUB8   t1, t1, m00
    359         SEL     pos, t1, m00
    360         USUB8   neg, pos, t1
    361         USUB8   t3, pos, tC
    362         SEL     pos, tC, pos
    363         USUB8   t3, neg, tC
    364         SEL     neg, tC, neg
    365         UQADD8  P_0, p_0, pos
    366         UQSUB8  Q_0, q_0, pos
    367         UQSUB8  P_0, P_0, neg
    368         UQADD8  Q_0, Q_0, neg
    369 
    370         ;// Choose to store the filtered
    371         ;// value or the original pixel
    372         USUB8   t1, filt, m01
    373         SEL     P_0, P_0, p_0
    374         SEL     Q_0, Q_0, q_0
    375 
    376 StoreResultAndExit
    377 
    378         ;//---------Store result---------------
    379 
    380         ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
    381         ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
    382 
    383         SUB     pQ0, pQ0, srcdstStep, LSL #1
    384         ADD        pQ0, pQ0, #1
    385 
    386         MOV     t1, Q_0, LSR #24
    387         STRB    t1, [pQ0, #1]
    388         MOV     t1, P_0, LSR #24
    389         M_STRB  t1, [pQ0], srcdstStep
    390 
    391         MOV     t1, Q_0, LSR #16
    392         STRB    t1, [pQ0, #1]
    393         MOV     t1, P_0, LSR #16
    394         M_STRB  t1, [pQ0], srcdstStep
    395 
    396         MOV     t1, P_0, LSR #8
    397         STRB    t1, [pQ0]
    398         STRB    P_0, [pQ0, srcdstStep]
    399         MOV     t1, Q_0, LSR #8
    400         STRB    t1, [pQ0, #1]!
    401         STRB    Q_0, [pQ0, srcdstStep]
    402 
    403         M_LDRD  XY, pBS, pXYBS
    404         M_LDRD  alpha, beta, pAlphaBeta1
    405 
    406         SUB     pQ0, pQ0, srcdstStep, LSL #1
    407         ADD     pQ0, pQ0, #4
    408 
    409         ADDS    XY, XY, XY
    410         M_STR   XY, pXYBS
    411         BCC     LoopX
    412 
    413 ;//-------- Common Exit of LoopY -----------------
    414         ;// Align the pointers
    415 
    416 ExitLoopY
    417 
    418         M_LDR   pThresholds, ppThresholds
    419         SUB     pQ0, pQ0, #8
    420         ADD     pQ0, pQ0, srcdstStep, LSL #2
    421         SUB     pBS, pBS, #14
    422         SUB     pThresholds, pThresholds, #6
    423         M_STR   pThresholds, ppThresholds
    424 
    425         M_LDRD  alpha, beta, pAlphaBeta0
    426 
    427         BNE     LoopY
    428         MOV     r0, #OMX_Sts_NoErr
    429 ;//-----------------End Filter--------------------
    430 
    431         M_END
    432 
    433         ENDIF
    434 
    435         END
    436 
    437 
    438