Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   9641
     21 ;// Date:       Thursday, February 7, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27         INCLUDE omxtypes_s.h
     28         INCLUDE armCOMM_s.h
     29 
     30         M_VARIANTS ARM1136JS
     31 
     32         IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
     33         IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
     34 
     35 
     36 
     37     IF ARM1136JS
     38 
     39 
     40 MASK_0      EQU 0x00000000
     41 MASK_1      EQU 0x01010101
     42 MASK_2      EQU 0xff00ff00
     43 LOOP_COUNT  EQU 0x11110000
     44 
     45 ;// Declare input registers
     46 
     47 pSrcDst     RN 0
     48 srcdstStep  RN 1
     49 pAlphaArg   RN 2
     50 pBetaArg    RN 3
     51 
     52 pThresholds RN 14
     53 pBS         RN 9
     54 pQ0         RN 0
     55 bS          RN 2
     56 
     57 alpha       RN 6
     58 alpha0      RN 6
     59 alpha1      RN 8
     60 
     61 beta        RN 7
     62 beta0       RN 7
     63 beta1       RN 9
     64 
     65 ;// Declare Local/Temporary variables
     66 
     67 ;// Pixels
     68 p_0         RN 3
     69 p_1         RN 5
     70 p_2         RN 4
     71 p_3         RN 2
     72 q_0         RN 8
     73 q_1         RN 9
     74 q_2         RN 10
     75 q_3         RN 12
     76 
     77 ;// Filtering
     78 
     79 dp0q0       RN 12
     80 dp1p0       RN 12
     81 dq1q0       RN 12
     82 dp2p0       RN 12
     83 dq2q0       RN 12
     84 
     85 ap0q0       RN 1
     86 filt        RN 2
     87 
     88 m00         RN 14
     89 m01         RN 11
     90 
     91 apflg       RN 0
     92 aqflg       RN 6
     93 apqflg      RN 0
     94 
     95 
     96 ;//Declarations for bSLT4 kernel
     97 
     98 tC0         RN 7
     99 ptC0        RN 1
    100 
    101 pQ0a        RN 0
    102 Stepa       RN 1
    103 maska       RN 14
    104 
    105 P0a         RN 1
    106 P1a         RN 8
    107 Q0a         RN 7
    108 Q1a         RN 11
    109 
    110 ;//Declarations for bSGE4 kernel
    111 
    112 pQ0b        RN 0
    113 Stepb       RN 1
    114 maskb       RN 14
    115 
    116 P0b         RN 6
    117 P1b         RN 7
    118 P2b         RN 1
    119 P3b         RN 3
    120 
    121 Q0b         RN 9
    122 Q1b         RN 0
    123 Q2b         RN 2
    124 Q3b         RN 3
    125 
    126 ;// Miscellanous
    127 XY          RN 8
    128 t0          RN 3
    129 t1          RN 12
    130 t2          RN 14
    131 t7          RN 7
    132 t4          RN 4
    133 t5          RN 1
    134 t8          RN 6
    135 a           RN 0
    136 
    137 
    138 
    139 
    140         ;// Allocate stack memory
    141         M_ALLOC4 ppThresholds,4
    142         M_ALLOC4 pQ_3,4
    143         M_ALLOC4 pP_3,4
    144         M_ALLOC8 pAlphaBeta0,8
    145         M_ALLOC8 pAlphaBeta1,8
    146         M_ALLOC8 pXYBS,4
    147         M_ALLOC4 ppBS,4
    148         M_ALLOC8 ppQ0Step,4
    149         M_ALLOC4 pStep,4
    150 
    151         ;// Function header
    152         M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11
    153 
    154         ;//Input arguments on the stack
    155         M_ARG   ppThresholdsArg, 4
    156         M_ARG   ppBSArg, 4
    157 
    158         LDR     t4,=MASK_1
    159 
    160         LDRB    alpha0, [pAlphaArg]
    161         LDRB    beta0,  [pBetaArg]
    162         LDRB    alpha1, [pAlphaArg,#1]
    163         LDRB    beta1,  [pBetaArg,#1]
    164 
    165         MUL     alpha0, alpha0, t4
    166         MUL     beta0, beta0, t4
    167         MUL     alpha1, alpha1, t4
    168         MUL     beta1, beta1, t4
    169 
    170         M_STRD  alpha0, beta0, pAlphaBeta0
    171         M_STRD  alpha1, beta1, pAlphaBeta1
    172 
    173         LDR     XY,=LOOP_COUNT
    174         M_LDR   pBS, ppBSArg
    175         M_LDR   pThresholds, ppThresholdsArg
    176         M_STR   srcdstStep, pStep
    177         M_STRD  XY, pBS, pXYBS
    178         SUB     pQ0, pQ0, srcdstStep, LSL #2
    179         M_STR   pThresholds, ppThresholds
    180 LoopY
    181 LoopX
    182 ;//---------------Load Pixels-------------------
    183         M_STR   pQ0, ppQ0Step
    184         M_LDR   p_3, [pQ0], srcdstStep
    185         M_LDR   p_2, [pQ0], srcdstStep
    186         M_STR   p_3, pP_3
    187         LDRB    bS, [pBS], #1
    188         M_STR   pBS, ppBS
    189         M_LDR   p_1, [pQ0], srcdstStep
    190         CMP     bS, #0
    191         M_LDR   p_0, [pQ0], srcdstStep
    192         M_LDR   q_0, [pQ0], srcdstStep
    193         M_LDR   q_1, [pQ0], srcdstStep
    194         M_LDR   q_2, [pQ0], srcdstStep
    195         M_LDR   q_3, [pQ0], srcdstStep
    196         BEQ     NoFilterBS0
    197         CMP     bS, #4
    198         M_STR   q_3, pQ_3
    199 
    200 ;//--------------Filtering Decision -------------------
    201         LDR     m01, =MASK_1                ;//  01010101 mask
    202         MOV     m00, #MASK_0                ;//  00000000 mask
    203 
    204         ;// Check |p0-q0|<Alpha
    205         USUB8   dp0q0, p_0, q_0
    206         USUB8   a, q_0, p_0
    207         SEL     ap0q0, a, dp0q0
    208         USUB8   a, ap0q0, alpha
    209         SEL     filt, m00, m01
    210 
    211         ;// Check |p1-p0|<Beta
    212         USUB8   dp1p0, p_1, p_0
    213         USUB8   a, p_0, p_1
    214         SEL     a, a, dp1p0
    215         USUB8   a, a, beta
    216         SEL     filt, m00, filt
    217 
    218         ;// Check |q1-q0|<Beta
    219         USUB8   dq1q0, q_1, q_0
    220         USUB8   a, q_0, q_1
    221         SEL     a, a, dq1q0
    222         USUB8   a, a, beta
    223         SEL     filt, m00, filt
    224 
    225         ;// Check ap<Beta
    226         USUB8   dp2p0, p_2, p_0
    227         USUB8   a, p_0, p_2
    228         SEL     a, a, dp2p0
    229         USUB8   a, a, beta
    230         SEL     apflg, m00, filt            ;// apflg = filt && (ap<beta)
    231 
    232         ;// Check aq<Beta
    233         USUB8   dq2q0, q_2, q_0
    234         USUB8   t2, q_0, q_2
    235         SEL     t2, t2, dq2q0
    236         USUB8   t2, t2, beta
    237         MOV     t7,#0
    238 
    239         BLT     bSLT4
    240 ;//-------------------Filter--------------------
    241 bSGE4
    242         ;//---------bSGE4 Execution---------------
    243         SEL     t1, t7, filt            ;// aqflg = filt && (aq<beta)
    244         CMP     filt, #0
    245         ORR     apqflg, apflg, t1, LSL #1
    246         M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
    247         BEQ     NoFilterFilt0
    248 
    249         BL      armVCM4P10_DeblockingLumabSGE4_unsafe
    250 
    251         ;//---------Store result---------------
    252         M_LDR   pThresholds, ppThresholds
    253         MOV     p_2, Q1b
    254         MOV     p_1, P2b
    255         M_LDRD  pQ0b, Stepb, ppQ0Step
    256         ADD     pThresholds, #1
    257         M_STR   pThresholds, ppThresholds
    258         M_STR   p_1, [pQ0b, Stepb]!
    259         M_STR   P1b, [pQ0b, Stepb]!
    260         M_STR   P0b, [pQ0b, Stepb]!
    261         M_STR   Q0b, [pQ0b, Stepb]!
    262         STR     p_2, [pQ0b, Stepb]
    263         STR     Q2b, [pQ0b, Stepb, LSL #1]
    264 
    265 
    266         M_LDRD  XY, pBS, pXYBS
    267         SUB     pQ0, pQ0b, Stepb, LSL #2
    268         ADD     pQ0, pQ0, #4
    269         M_LDRD  alpha, beta, pAlphaBeta0
    270         ADDS    XY, XY, XY
    271         M_STR   XY, pXYBS
    272         BCC     LoopX
    273         B       ExitLoopY
    274 
    275 ;//---------- Exit of LoopX --------------
    276 ;//---- for the case of no filtering -----
    277 
    278 NoFilterBS0
    279         SUB     pQ0, pQ0, srcdstStep, LSL #3
    280 NoFilterFilt0
    281         ADD     pQ0, pQ0, #4
    282         ;// Load counter for LoopX
    283         M_LDRD  XY, pBS, pXYBS
    284         M_LDR   pThresholds, ppThresholds
    285         M_LDRD  alpha, beta, pAlphaBeta0
    286 
    287         ;// Align the pointers
    288         ADDS    XY, XY, XY
    289         ADD     pThresholds, pThresholds, #1
    290         M_STR   pThresholds, ppThresholds
    291         M_STR   XY, pXYBS
    292         BCC     LoopX
    293         B       ExitLoopY
    294 
    295 bSLT4
    296         ;//---------bSLT4 Execution---------------
    297         SEL     aqflg, t7, filt            ;// aqflg = filt && (aq<beta)
    298         M_LDR   ptC0, ppThresholds
    299         CMP     filt, #0
    300         M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
    301         BEQ     NoFilterFilt0
    302 
    303         LDRB    tC0, [ptC0], #1
    304         M_STR   ptC0, ppThresholds
    305 
    306         BL      armVCM4P10_DeblockingLumabSLT4_unsafe
    307 
    308         ;//---------Store result---------------
    309         MOV     p_2, P0a
    310         M_LDRD  pQ0a, Stepa, ppQ0Step
    311         M_STR   P1a, [pQ0a, Stepa, LSL #1]!
    312         M_STR   p_2, [pQ0a, Stepa]!
    313         M_STR   Q0a, [pQ0a, Stepa]!
    314         STR     Q1a, [pQ0a, Stepa]
    315 
    316         ;// Load counter
    317         M_LDRD  XY, pBS, pXYBS
    318         M_LDRD  alpha, beta, pAlphaBeta0
    319 
    320         SUB     pQ0, pQ0a, Stepa, LSL #2
    321         ADD     pQ0, pQ0, #4
    322 
    323         ADDS    XY, XY, XY
    324         M_STR   XY, pXYBS
    325         BCC     LoopX
    326 
    327 ;//-------- Common Exit of LoopY -----------------
    328         ;// Align the pointers
    329 ExitLoopY
    330         M_LDRD  alpha, beta, pAlphaBeta1
    331         SUB     pQ0, pQ0, #16
    332         ADD     pQ0, pQ0, srcdstStep, LSL #2
    333         M_STRD  alpha, beta, pAlphaBeta0
    334 
    335         BNE     LoopY
    336         MOV     r0, #OMX_Sts_NoErr
    337 ;//-----------------End Filter--------------------
    338         M_END
    339 
    340     ENDIF
    341 
    342 
    343         END
    344 
    345 
    346