Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   9641
      6 ;// Date:       Thursday, February 7, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13         INCLUDE omxtypes_s.h
     14         INCLUDE armCOMM_s.h
     15 
     16         M_VARIANTS ARM1136JS
     17 
     18         IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
     19         IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
     20 
     21 
     22 
     23     IF ARM1136JS
     24 
     25 
     26 MASK_0      EQU 0x00000000
     27 MASK_1      EQU 0x01010101
     28 MASK_2      EQU 0xff00ff00
     29 LOOP_COUNT  EQU 0x11110000
     30 
     31 ;// Declare input registers
     32 
     33 pSrcDst     RN 0
     34 srcdstStep  RN 1
     35 pAlphaArg   RN 2
     36 pBetaArg    RN 3
     37 
     38 pThresholds RN 14
     39 pBS         RN 9
     40 pQ0         RN 0
     41 bS          RN 2
     42 
     43 alpha       RN 6
     44 alpha0      RN 6
     45 alpha1      RN 8
     46 
     47 beta        RN 7
     48 beta0       RN 7
     49 beta1       RN 9
     50 
     51 ;// Declare Local/Temporary variables
     52 
     53 ;// Pixels
     54 p_0         RN 3
     55 p_1         RN 5
     56 p_2         RN 4
     57 p_3         RN 2
     58 q_0         RN 8
     59 q_1         RN 9
     60 q_2         RN 10
     61 q_3         RN 12
     62 
     63 ;// Filtering
     64 
     65 dp0q0       RN 12
     66 dp1p0       RN 12
     67 dq1q0       RN 12
     68 dp2p0       RN 12
     69 dq2q0       RN 12
     70 
     71 ap0q0       RN 1
     72 filt        RN 2
     73 
     74 m00         RN 14
     75 m01         RN 11
     76 
     77 apflg       RN 0
     78 aqflg       RN 6
     79 apqflg      RN 0
     80 
     81 
     82 ;//Declarations for bSLT4 kernel
     83 
     84 tC0         RN 7
     85 ptC0        RN 1
     86 
     87 pQ0a        RN 0
     88 Stepa       RN 1
     89 maska       RN 14
     90 
     91 P0a         RN 1
     92 P1a         RN 8
     93 Q0a         RN 7
     94 Q1a         RN 11
     95 
     96 ;//Declarations for bSGE4 kernel
     97 
     98 pQ0b        RN 0
     99 Stepb       RN 1
    100 maskb       RN 14
    101 
    102 P0b         RN 6
    103 P1b         RN 7
    104 P2b         RN 1
    105 P3b         RN 3
    106 
    107 Q0b         RN 9
    108 Q1b         RN 0
    109 Q2b         RN 2
    110 Q3b         RN 3
    111 
    112 ;// Miscellanous
    113 XY          RN 8
    114 t0          RN 3
    115 t1          RN 12
    116 t2          RN 14
    117 t7          RN 7
    118 t4          RN 4
    119 t5          RN 1
    120 t8          RN 6
    121 a           RN 0
    122 
    123 
    124 
    125 
    126         ;// Allocate stack memory
    127         M_ALLOC4 ppThresholds,4
    128         M_ALLOC4 pQ_3,4
    129         M_ALLOC4 pP_3,4
    130         M_ALLOC8 pAlphaBeta0,8
    131         M_ALLOC8 pAlphaBeta1,8
    132         M_ALLOC8 pXYBS,4
    133         M_ALLOC4 ppBS,4
    134         M_ALLOC8 ppQ0Step,4
    135         M_ALLOC4 pStep,4
    136 
    137         ;// Function header
    138         M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11
    139 
    140         ;//Input arguments on the stack
    141         M_ARG   ppThresholdsArg, 4
    142         M_ARG   ppBSArg, 4
    143 
    144         LDR     t4,=MASK_1
    145 
    146         LDRB    alpha0, [pAlphaArg]
    147         LDRB    beta0,  [pBetaArg]
    148         LDRB    alpha1, [pAlphaArg,#1]
    149         LDRB    beta1,  [pBetaArg,#1]
    150 
    151         MUL     alpha0, alpha0, t4
    152         MUL     beta0, beta0, t4
    153         MUL     alpha1, alpha1, t4
    154         MUL     beta1, beta1, t4
    155 
    156         M_STRD  alpha0, beta0, pAlphaBeta0
    157         M_STRD  alpha1, beta1, pAlphaBeta1
    158 
    159         LDR     XY,=LOOP_COUNT
    160         M_LDR   pBS, ppBSArg
    161         M_LDR   pThresholds, ppThresholdsArg
    162         M_STR   srcdstStep, pStep
    163         M_STRD  XY, pBS, pXYBS
    164         SUB     pQ0, pQ0, srcdstStep, LSL #2
    165         M_STR   pThresholds, ppThresholds
    166 LoopY
    167 LoopX
    168 ;//---------------Load Pixels-------------------
    169         M_STR   pQ0, ppQ0Step
    170         M_LDR   p_3, [pQ0], srcdstStep
    171         M_LDR   p_2, [pQ0], srcdstStep
    172         M_STR   p_3, pP_3
    173         LDRB    bS, [pBS], #1
    174         M_STR   pBS, ppBS
    175         M_LDR   p_1, [pQ0], srcdstStep
    176         CMP     bS, #0
    177         M_LDR   p_0, [pQ0], srcdstStep
    178         M_LDR   q_0, [pQ0], srcdstStep
    179         M_LDR   q_1, [pQ0], srcdstStep
    180         M_LDR   q_2, [pQ0], srcdstStep
    181         M_LDR   q_3, [pQ0], srcdstStep
    182         BEQ     NoFilterBS0
    183         CMP     bS, #4
    184         M_STR   q_3, pQ_3
    185 
    186 ;//--------------Filtering Decision -------------------
    187         LDR     m01, =MASK_1                ;//  01010101 mask
    188         MOV     m00, #MASK_0                ;//  00000000 mask
    189 
    190         ;// Check |p0-q0|<Alpha
    191         USUB8   dp0q0, p_0, q_0
    192         USUB8   a, q_0, p_0
    193         SEL     ap0q0, a, dp0q0
    194         USUB8   a, ap0q0, alpha
    195         SEL     filt, m00, m01
    196 
    197         ;// Check |p1-p0|<Beta
    198         USUB8   dp1p0, p_1, p_0
    199         USUB8   a, p_0, p_1
    200         SEL     a, a, dp1p0
    201         USUB8   a, a, beta
    202         SEL     filt, m00, filt
    203 
    204         ;// Check |q1-q0|<Beta
    205         USUB8   dq1q0, q_1, q_0
    206         USUB8   a, q_0, q_1
    207         SEL     a, a, dq1q0
    208         USUB8   a, a, beta
    209         SEL     filt, m00, filt
    210 
    211         ;// Check ap<Beta
    212         USUB8   dp2p0, p_2, p_0
    213         USUB8   a, p_0, p_2
    214         SEL     a, a, dp2p0
    215         USUB8   a, a, beta
    216         SEL     apflg, m00, filt            ;// apflg = filt && (ap<beta)
    217 
    218         ;// Check aq<Beta
    219         USUB8   dq2q0, q_2, q_0
    220         USUB8   t2, q_0, q_2
    221         SEL     t2, t2, dq2q0
    222         USUB8   t2, t2, beta
    223         MOV     t7,#0
    224 
    225         BLT     bSLT4
    226 ;//-------------------Filter--------------------
    227 bSGE4
    228         ;//---------bSGE4 Execution---------------
    229         SEL     t1, t7, filt            ;// aqflg = filt && (aq<beta)
    230         CMP     filt, #0
    231         ORR     apqflg, apflg, t1, LSL #1
    232         M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
    233         BEQ     NoFilterFilt0
    234 
    235         BL      armVCM4P10_DeblockingLumabSGE4_unsafe
    236 
    237         ;//---------Store result---------------
    238         M_LDR   pThresholds, ppThresholds
    239         MOV     p_2, Q1b
    240         MOV     p_1, P2b
    241         M_LDRD  pQ0b, Stepb, ppQ0Step
    242         ADD     pThresholds, #1
    243         M_STR   pThresholds, ppThresholds
    244         M_STR   p_1, [pQ0b, Stepb]!
    245         M_STR   P1b, [pQ0b, Stepb]!
    246         M_STR   P0b, [pQ0b, Stepb]!
    247         M_STR   Q0b, [pQ0b, Stepb]!
    248         STR     p_2, [pQ0b, Stepb]
    249         STR     Q2b, [pQ0b, Stepb, LSL #1]
    250 
    251 
    252         M_LDRD  XY, pBS, pXYBS
    253         SUB     pQ0, pQ0b, Stepb, LSL #2
    254         ADD     pQ0, pQ0, #4
    255         M_LDRD  alpha, beta, pAlphaBeta0
    256         ADDS    XY, XY, XY
    257         M_STR   XY, pXYBS
    258         BCC     LoopX
    259         B       ExitLoopY
    260 
    261 ;//---------- Exit of LoopX --------------
    262 ;//---- for the case of no filtering -----
    263 
    264 NoFilterBS0
    265         SUB     pQ0, pQ0, srcdstStep, LSL #3
    266 NoFilterFilt0
    267         ADD     pQ0, pQ0, #4
    268         ;// Load counter for LoopX
    269         M_LDRD  XY, pBS, pXYBS
    270         M_LDR   pThresholds, ppThresholds
    271         M_LDRD  alpha, beta, pAlphaBeta0
    272 
    273         ;// Align the pointers
    274         ADDS    XY, XY, XY
    275         ADD     pThresholds, pThresholds, #1
    276         M_STR   pThresholds, ppThresholds
    277         M_STR   XY, pXYBS
    278         BCC     LoopX
    279         B       ExitLoopY
    280 
    281 bSLT4
    282         ;//---------bSLT4 Execution---------------
    283         SEL     aqflg, t7, filt            ;// aqflg = filt && (aq<beta)
    284         M_LDR   ptC0, ppThresholds
    285         CMP     filt, #0
    286         M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
    287         BEQ     NoFilterFilt0
    288 
    289         LDRB    tC0, [ptC0], #1
    290         M_STR   ptC0, ppThresholds
    291 
    292         BL      armVCM4P10_DeblockingLumabSLT4_unsafe
    293 
    294         ;//---------Store result---------------
    295         MOV     p_2, P0a
    296         M_LDRD  pQ0a, Stepa, ppQ0Step
    297         M_STR   P1a, [pQ0a, Stepa, LSL #1]!
    298         M_STR   p_2, [pQ0a, Stepa]!
    299         M_STR   Q0a, [pQ0a, Stepa]!
    300         STR     Q1a, [pQ0a, Stepa]
    301 
    302         ;// Load counter
    303         M_LDRD  XY, pBS, pXYBS
    304         M_LDRD  alpha, beta, pAlphaBeta0
    305 
    306         SUB     pQ0, pQ0a, Stepa, LSL #2
    307         ADD     pQ0, pQ0, #4
    308 
    309         ADDS    XY, XY, XY
    310         M_STR   XY, pXYBS
    311         BCC     LoopX
    312 
    313 ;//-------- Common Exit of LoopY -----------------
    314         ;// Align the pointers
    315 ExitLoopY
    316         M_LDRD  alpha, beta, pAlphaBeta1
    317         SUB     pQ0, pQ0, #16
    318         ADD     pQ0, pQ0, srcdstStep, LSL #2
    319         M_STRD  alpha, beta, pAlphaBeta0
    320 
    321         BNE     LoopY
    322         MOV     r0, #OMX_Sts_NoErr
    323 ;//-----------------End Filter--------------------
    324         M_END
    325 
    326     ENDIF
    327 
    328 
    329         END
    330 
    331