Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   9641
     21 ;// Date:       Thursday, February 7, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27         INCLUDE omxtypes_s.h
     28         INCLUDE armCOMM_s.h
     29 
     30         M_VARIANTS ARM1136JS
     31 
     32 
     33         IF ARM1136JS
     34 
     35 
     36 MASK_0      EQU 0x00000000
     37 MASK_1      EQU 0x01010101
     38 MASK_2      EQU 0x0000ff00
     39 LOOP_COUNT  EQU 0x50000000
     40 
     41 ;// Declare input registers
     42 
     43 pSrcDst     RN 0
     44 srcdstStep  RN 1
     45 pAlphaArg   RN 2
     46 pBetaArg    RN 3
     47 
     48 pThresholds RN 6
     49 pBS         RN 9
     50 pQ0         RN 0
     51 bS          RN 2
     52 bSTemp      RN 10
     53 
     54 alpha       RN 6
     55 alpha0      RN 6
     56 alpha1      RN 8
     57 
     58 beta        RN 7
     59 beta0       RN 7
     60 beta1       RN 9
     61 
     62 ;// Declare Local/Temporary variables
     63 
     64 ;// Pixels
     65 p_0         RN 3
     66 p_1         RN 5
     67 q_0         RN 8
     68 q_1         RN 9
     69 
     70 ;// Unpacking
     71 mask        RN 11
     72 
     73 row0        RN 2
     74 row1        RN 4
     75 row2        RN 5
     76 row3        RN 3
     77 
     78 row4        RN 8
     79 row5        RN 9
     80 row6        RN 10
     81 row7        RN 12
     82 
     83 tunpk0      RN 2
     84 tunpk2      RN 10
     85 tunpk3      RN 12
     86 
     87 tunpk4      RN 4
     88 tunpk5      RN 5
     89 tunpk6      RN 14
     90 tunpk7      RN 2
     91 
     92 ;// Filtering
     93 
     94 dp0q0       RN 12
     95 dp1p0       RN 12
     96 dq1q0       RN 12
     97 
     98 ap0q0       RN 4
     99 filt        RN 2
    100 
    101 m00         RN 14
    102 m01         RN 11
    103 
    104 pQ0         RN 0
    105 Step        RN 1
    106 
    107 ;// Output
    108 
    109 P_0         RN 6
    110 Q_0         RN 7
    111 
    112 ;//Declarations for bSLT4 kernel
    113 
    114 tC          RN 12
    115 tC0         RN 5
    116 tC1         RN 12
    117 pos         RN 5
    118 neg         RN 9
    119 
    120 ;//Declarations for bSGE4 kernel
    121 
    122 
    123 ;// Miscellanous
    124 XY          RN 8
    125 
    126 a           RN 10
    127 t1          RN 10
    128 t2          RN 12
    129 t3          RN 14
    130 t4          RN 6
    131 t5          RN 5
    132 
    133 
    134         ;// Allocate stack memory
    135         M_ALLOC4 ppThresholds,4
    136         M_ALLOC8 pAlphaBeta0,8
    137         M_ALLOC8 pAlphaBeta1,8
    138         M_ALLOC8 pXYBS,4
    139         M_ALLOC4 ppBS,4
    140 
    141         ;// Function header
    142         M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r11
    143 
    144         ;//Input arguments on the stack
    145         M_ARG   ppThresholdsArg, 4
    146         M_ARG   ppBSArg, 4
    147 
    148         LDRB    alpha1, [pAlphaArg,#1]
    149         LDRB    beta1,  [pBetaArg,#1]
    150         M_LDR   pThresholds, ppThresholdsArg
    151         LDR     a,=MASK_1
    152         LDRB    beta0,  [pBetaArg]
    153         M_STR   pThresholds, ppThresholds
    154         LDRB    alpha0, [pAlphaArg]
    155 
    156         MUL     alpha1, alpha1, a
    157         MUL     beta1, beta1, a
    158         MUL     alpha0, alpha0, a
    159         MUL     beta0, beta0, a
    160 
    161         M_STRD  alpha1, beta1, pAlphaBeta1
    162         M_LDR   pBS, ppBSArg
    163         M_STRD  alpha0, beta0, pAlphaBeta0
    164 
    165         LDR     XY,=LOOP_COUNT
    166         M_STRD  XY, pBS, pXYBS
    167 
    168 
    169 LoopY
    170 LoopX
    171 ;//---------------Load Pixels-------------------
    172 
    173 ;//----------------Pack q0-q1-----------------------
    174         LDRH    bS, [pBS], #8
    175         LDR     mask, =MASK_2
    176 
    177         M_LDRH  row4, [pQ0], srcdstStep
    178         CMP     bS, #0
    179         M_STR   pBS, ppBS
    180         M_LDRH  row5, [pQ0], srcdstStep
    181         BEQ.W   NoFilterBS0
    182         LDRH    row6, [pQ0]
    183         LDRH    row7, [pQ0, srcdstStep]
    184 
    185         ;// row4 = [0 0 r0q0 r0q1]
    186         ;// row5 = [0 0 r1q0 r1q1]
    187         ;// row6 = [0 0 r2q0 r2q1]
    188         ;// row7 = [0 0 r3q0 r3q1]
    189 
    190         AND     tunpk4, mask, row4
    191         AND     tunpk5, mask, row4, LSL#8
    192         UXTAB   tunpk4, tunpk4, row5, ROR#8
    193         UXTAB   tunpk5, tunpk5, row5
    194         AND     tunpk6, mask, row6
    195         AND     tunpk7, mask, row6, LSL#8
    196         UXTAB   tunpk6, tunpk6, row7, ROR#8
    197         UXTAB   tunpk7, tunpk7, row7
    198 
    199         ;// tunpk4 = [0 0 r0q0 r1q0]
    200         ;// tunpk5 = [0 0 r0q1 r1q1]
    201         ;// tunpk6 = [0 0 r2q0 r3q0]
    202         ;// tunpk7 = [0 0 r2q1 r3q1]
    203 
    204         SUB     pQ0, pQ0, srcdstStep, LSL #1
    205         SUB     pQ0, pQ0, #2
    206 
    207         PKHBT   q_1, tunpk6, tunpk4, LSL#16
    208         PKHBT   q_0, tunpk7, tunpk5, LSL#16
    209 
    210         ;// q_0 = [r0q0 r1q0 r2q0 r3q0]
    211         ;// q_1 = [r0q1 r1q1 r2q1 r3q1]
    212 
    213 
    214 ;//----------------Pack p0-p1-----------------------
    215 
    216         M_LDRH  row0, [pQ0], srcdstStep
    217         M_LDRH  row1, [pQ0], srcdstStep
    218         LDRH    row2, [pQ0]
    219         LDRH    row3, [pQ0, srcdstStep]
    220 
    221         ;// row0 = [0 0 r0p0 r0p1]
    222         ;// row1 = [0 0 r1p0 r1p1]
    223         ;// row2 = [0 0 r2p0 r2p1]
    224         ;// row3 = [0 0 r3p0 r3p1]
    225 
    226         AND     tunpk2, mask, row0
    227         AND     tunpk6, mask, row0, LSL#8
    228         UXTAB   tunpk2, tunpk2, row1, ROR#8
    229         UXTAB   tunpk6, tunpk6, row1
    230 
    231         AND     tunpk0, mask, row2
    232         AND     tunpk3, mask, row2, LSL#8
    233         UXTAB   tunpk0, tunpk0, row3, ROR#8
    234         UXTAB   tunpk3, tunpk3, row3
    235 
    236         ;// tunpk2 = [0 0 r0p0 r1p0]
    237         ;// tunpk6 = [0 0 r0p1 r1p1]
    238         ;// tunpk0 = [0 0 r2p0 r3p0]
    239         ;// tunpk3 = [0 0 r2p1 r3p1]
    240 
    241         PKHBT   p_0, tunpk0, tunpk2, LSL#16
    242         M_LDR   bSTemp, ppBS
    243         PKHBT   p_1, tunpk3, tunpk6, LSL#16
    244 
    245         ;// p_0 = [r0p0 r1p0 r2p0 r3p0]
    246         ;// p_1 = [r0p1 r1p1 r2p1 r3p1]
    247 
    248 ;//--------------Filtering Decision -------------------
    249         USUB8   dp0q0, p_0, q_0
    250         LDR     m01, =MASK_1
    251         LDRH    bSTemp, [bSTemp ,#-8]
    252         MOV     m00, #MASK_0                ;//  00000000 mask
    253 
    254         MOV     filt, m01
    255         TST     bSTemp, #0xff00
    256         MOVEQ   filt, filt, LSL #16
    257         TST     bSTemp, #0xff
    258         MOVEQ   filt, filt, LSR #16
    259         TST     bSTemp, #4
    260 
    261         ;// Check |p0-q0|<Alpha
    262         USUB8   a, q_0, p_0
    263         SEL     ap0q0, a, dp0q0
    264         USUB8   a, ap0q0, alpha
    265         SEL     filt, m00, filt
    266 
    267         ;// Check |p1-p0|<Beta
    268         USUB8   dp1p0, p_1, p_0
    269         USUB8   a, p_0, p_1
    270         SEL     a, a, dp1p0
    271         USUB8   a, a, beta
    272         SEL     filt, m00, filt
    273 
    274         ;// Check |q1-q0|<Beta
    275         USUB8   dq1q0, q_1, q_0
    276         USUB8   a, q_0, q_1
    277         SEL     a, a, dq1q0
    278         USUB8   a, a, beta
    279         SEL     filt, m00, filt
    280 
    281         BEQ     bSLT4
    282 ;//-------------------Filter--------------------
    283 bSGE4
    284         ;//---------bSGE4 Execution---------------
    285         CMP     filt, #0
    286 
    287         M_LDR   pThresholds, ppThresholds
    288 
    289         ;// Compute P0b
    290         UHADD8  t1, p_0, q_1
    291         BEQ     NoFilterFilt0
    292         MVN     t2, p_1
    293         UHSUB8  t1, t1, t2
    294         USUB8   t2, filt, m01
    295         EOR     t1, t1, m01, LSL #7
    296 
    297         ADD     pThresholds,pThresholds, #4
    298 
    299         ;// Compute Q0b
    300         UHADD8  t2, q_0, p_1
    301         MVN     t3, q_1
    302         UHSUB8  t2, t2, t3
    303         M_STR   pThresholds, ppThresholds
    304         SEL     P_0, t1, p_0
    305         EOR     t2, t2, m01, LSL #7
    306         SEL     Q_0, t2, q_0
    307 
    308         B       StoreResultAndExit
    309 
    310 ;//---------- Exit of LoopX --------------
    311 ;//---- for the case of no filtering -----
    312 
    313 NoFilterFilt0
    314         ADD     pQ0, pQ0, #2
    315 NoFilterBS0
    316         M_LDR   pThresholds, ppThresholds
    317         SUB     pQ0, pQ0, srcdstStep, LSL #1
    318         ADD     pQ0, pQ0, #4
    319         ADD     pThresholds, pThresholds, #4
    320         ;// Load counter for LoopX
    321         M_LDRD  XY, pBS, pXYBS
    322         M_STR   pThresholds, ppThresholds
    323         M_LDRD  alpha, beta, pAlphaBeta1
    324 
    325         ;// Align the pointer
    326         ADDS    XY, XY, XY
    327         M_STR   XY, pXYBS
    328         BCC     LoopY
    329         B       ExitLoopY
    330 
    331 bSLT4
    332         ;//---------bSLT4 Execution---------------
    333         M_LDR   pThresholds, ppThresholds
    334         CMP     filt, #0
    335 
    336 
    337         ;// Since beta <= 18 and alpha <= 255 we know
    338         ;// -254 <= p0-q0 <= 254
    339         ;//  -17 <= q1-q0 <= 17
    340         ;//  -17 <= p1-p0 <= 17
    341 
    342         ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
    343         ;//
    344         ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
    345         ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
    346         ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
    347 
    348         USUB8   t1, p_1, p_0
    349         USUB8   t2, q_1, q_0
    350         BEQ     NoFilterFilt0
    351 
    352         LDRB    tC0, [pThresholds], #1
    353         SSUB8   t1, t1, t2
    354         LDRB    tC1, [pThresholds], #3
    355         M_STR   pThresholds, ppThresholds
    356         UHSUB8  t4, p_0, q_0
    357         ORR     tC, tC1, tC0, LSL #16
    358         USUB8   t5, p_0, q_0
    359         AND     t5, t5, m01
    360         SHSUB8  t1, t1, t5
    361         ORR     tC, tC, LSL #8
    362         SSUB8   t1, t1, t5
    363         SHSUB8  t1, t1, t4
    364         UQADD8  tC, tC, m01
    365         SADD8   t1, t1, m01
    366         USUB8   t5, filt, m01
    367         SHSUB8  t1, t1, t4
    368         SEL     tC, tC, m00
    369 
    370         ;// Split into positive and negative part and clip
    371 
    372         SSUB8   t1, t1, m00
    373         SEL     pos, t1, m00
    374         USUB8   neg, pos, t1
    375         USUB8   t3, pos, tC
    376         SEL     pos, tC, pos
    377         USUB8   t3, neg, tC
    378         SEL     neg, tC, neg
    379         UQADD8  P_0, p_0, pos
    380         UQSUB8  Q_0, q_0, pos
    381         UQSUB8  P_0, P_0, neg
    382         UQADD8  Q_0, Q_0, neg
    383 
    384         ;// Choose to store the filtered
    385         ;// value or the original pixel
    386         USUB8   t1, filt, m01
    387         SEL     P_0, P_0, p_0
    388         SEL     Q_0, Q_0, q_0
    389 
    390 StoreResultAndExit
    391 
    392         ;//---------Store result---------------
    393 
    394         ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
    395         ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
    396 
    397         SUB     pQ0, pQ0, srcdstStep, LSL #1
    398         ADD        pQ0, pQ0, #1
    399 
    400         MOV     t1, Q_0, LSR #24
    401         STRB    t1, [pQ0, #1]
    402         MOV     t1, P_0, LSR #24
    403         M_STRB  t1, [pQ0], srcdstStep
    404 
    405         MOV     t1, Q_0, LSR #16
    406         STRB    t1, [pQ0, #1]
    407         MOV     t1, P_0, LSR #16
    408         M_STRB  t1, [pQ0], srcdstStep
    409 
    410         MOV     t1, P_0, LSR #8
    411         STRB    t1, [pQ0]
    412         STRB    P_0, [pQ0, srcdstStep]
    413         MOV     t1, Q_0, LSR #8
    414         STRB    t1, [pQ0, #1]!
    415         STRB    Q_0, [pQ0, srcdstStep]
    416 
    417         M_LDRD  XY, pBS, pXYBS
    418         M_LDRD  alpha, beta, pAlphaBeta1
    419 
    420         SUB     pQ0, pQ0, srcdstStep, LSL #1
    421         ADD     pQ0, pQ0, #4
    422 
    423         ADDS    XY, XY, XY
    424         M_STR   XY, pXYBS
    425         BCC     LoopX
    426 
    427 ;//-------- Common Exit of LoopY -----------------
    428         ;// Align the pointers
    429 
    430 ExitLoopY
    431 
    432         M_LDR   pThresholds, ppThresholds
    433         SUB     pQ0, pQ0, #8
    434         ADD     pQ0, pQ0, srcdstStep, LSL #2
    435         SUB     pBS, pBS, #14
    436         SUB     pThresholds, pThresholds, #6
    437         M_STR   pThresholds, ppThresholds
    438 
    439         M_LDRD  alpha, beta, pAlphaBeta0
    440 
    441         BNE     LoopY
    442         MOV     r0, #OMX_Sts_NoErr
    443 ;//-----------------End Filter--------------------
    444 
    445         M_END
    446 
    447         ENDIF
    448 
    449         END
    450 
    451 
    452