Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   9641
      6 ;// Date:       Thursday, February 7, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13         INCLUDE omxtypes_s.h
     14         INCLUDE armCOMM_s.h
     15 
     16         M_VARIANTS ARM1136JS
     17 
     18         EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
     19 
     20 DEBUG_ON    SETL {FALSE}
     21 
     22 
     23     IF ARM1136JS
     24 
     25 ;// Function:
     26 ;//     armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
     27 ;//
     28 ;// Implements horizontal interpolation for a block of size 4x4. Input and output should
     29 ;// be aligned.
     30 ;//
     31 ;// Registers used as input for this function
     32 ;// r0,r1,r2,r3 where r0,r2  input pointer and r1,r3 corresponding step size
     33 ;//
     34 ;// Registers preserved for top level function
     35 ;// r0,r1,r2,r3,r4,r5,r6,r14
     36 ;//
     37 ;// Registers modified by the function
     38 ;// r7,r8,r9,r10,r11,r12
     39 ;//
     40 ;// Output registers
     41 ;// None. Function will preserve r0-r3
     42 
     43 
     44 ;// Declare input registers
     45 pSrc            RN 0
     46 srcStep         RN 1
     47 pDst            RN 2
     48 dstStep         RN 3
     49 
     50 ;// Declare inner loop registers
     51 Acc0            RN 4
     52 Acc1            RN 5
     53 Acc2            RN 6
     54 Acc3            RN 7
     55 
     56 ValA            RN 4
     57 ValB            RN 5
     58 ValC            RN 6
     59 ValD            RN 7
     60 ValE            RN 8
     61 ValF            RN 9
     62 ValG            RN 12
     63 ValH            RN 14
     64 ValI            RN 1
     65 
     66 Temp1           RN 3
     67 Temp2           RN 1
     68 Temp3           RN 12
     69 Temp4           RN 7
     70 Temp5           RN 5
     71 r0x0fe00fe0     RN 3                                    ;// [0 (16*255 - 16) 0 (16*255 - 16)]
     72 r0x00ff00ff     RN 10                                   ;// [0 255 0 255] where 255 is offset
     73 Counter         RN 11
     74 
     75 Height          RN 3
     76 
     77         M_ALLOC4 pDstStep, 4
     78         M_ALLOC4 pSrcStep, 4
     79 
     80         ;// Function header
     81         M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6
     82 
     83         MOV     Counter, #2
     84         M_STR   dstStep, pDstStep
     85         M_STR   srcStep, pSrcStep
     86         LDR     r0x00ff00ff, =0x00ff00ff               ;// [0 255 0 255] 255 is offset to avoid negative results
     87 
     88 NextTwoRowsLoop
     89         LDR     ValD, [pSrc, srcStep]                   ;// Load row 1 [d1 c1 b1 a1]
     90         LDR     ValA, [pSrc], #4                        ;// Load row 0 [d0 c0 b0 a0]
     91         LDR     ValH, [pSrc, srcStep]                   ;// Load  [h1 g1 f1 e1]
     92         LDR     ValE, [pSrc], #4                        ;// Load  [h0 g0 f0 e0]
     93         LDRB    Temp2, [pSrc, srcStep]                  ;// Load row 1 [l1 k1 j1 i1]
     94         LDRB    Temp1, [pSrc], #-8                      ;// Load row 0 [l0 k0 j0 i0]
     95 
     96         PKHBT   ValB, ValA, ValD, LSL #16               ;// [b1 a1 b0 a0]
     97         PKHTB   ValD, ValD, ValA, ASR #16               ;// [d1 c1 d0 c0]
     98         UXTAB16 ValA, r0x00ff00ff, ValB                 ;// [00 a1 00 a0] + [0 255 0 255]
     99         UXTAB16 ValC, r0x00ff00ff, ValD                 ;// [00 c1 00 c0] + [0 255 0 255]
    100         PKHBT   ValI, Temp1, Temp2, LSL #16             ;// [00 i1 00 i0]
    101         PKHBT   ValF, ValE, ValH, LSL #16               ;// [f1 e1 f0 e0]
    102         PKHTB   ValH, ValH, ValE, ASR #16               ;// [h1 g1 h0 g0]
    103         UXTAB16 ValE, r0x00ff00ff, ValF                 ;// [00 e1 00 e0] + [0 255 0 255]
    104 
    105         ;// Calculate Acc0
    106         ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
    107         UXTAB16 Temp1, ValC, ValD, ROR #8
    108         UXTAB16 Temp3, ValE, ValB, ROR #8
    109         RSB     Temp1, Temp3, Temp1, LSL #2
    110         UXTAB16 Acc0, ValA, ValF, ROR #8
    111         ADD     Temp1, Temp1, Temp1, LSL #2
    112         ADD     Acc0, Acc0, Temp1
    113 
    114         ;// Calculate Acc1
    115         ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
    116         UXTAB16 Temp1, ValE, ValD, ROR #8
    117         UXTAB16 Temp3, ValC, ValF, ROR #8
    118         RSB     Temp1, Temp3, Temp1, LSL #2
    119         UXTAB16 ValG, r0x00ff00ff, ValH                 ;// [00 g1 00 g0] + [0 255 0 255]
    120         ADD     Temp1, Temp1, Temp1, LSL #2
    121         UXTAB16 Acc1, ValG, ValB, ROR #8
    122         ADD     Acc1, Acc1, Temp1
    123 
    124         LDR     r0x0fe00fe0, =0x0fe00fe0                ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255
    125         UXTAB16 Acc2, ValC, ValH, ROR #8
    126         ADD     ValI, r0x00ff00ff, ValI                 ;// [00 i1 00 i0] + [0 255 0 255]
    127         UQSUB16 Acc0, Acc0, r0x0fe00fe0
    128         UQSUB16 Acc1, Acc1, r0x0fe00fe0
    129         USAT16  Acc0, #13, Acc0
    130         USAT16  Acc1, #13, Acc1
    131 
    132         ;// Calculate Acc2
    133         ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
    134         UXTAB16 Temp1, ValG, ValD, ROR #8
    135         UXTAB16 Acc3, ValI, ValD, ROR #8
    136         UXTAB16 Temp2, ValE, ValF, ROR #8
    137         AND     Acc1, r0x00ff00ff, Acc1, LSR #5
    138         AND     Acc0, r0x00ff00ff, Acc0, LSR #5
    139         ORR     Acc0, Acc0, Acc1, LSL #8
    140         RSB     Temp5, Temp1, Temp2, LSL #2
    141         UXTAB16 Temp2, ValG, ValF, ROR #8
    142         ADD     Temp5, Temp5, Temp5, LSL #2
    143         ADD     Acc2, Acc2, Temp5
    144 
    145         ;// Calculate Acc3
    146         ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
    147         UXTAB16 Temp5, ValE, ValH, ROR #8
    148         RSB     Temp5, Temp5, Temp2, LSL #2
    149         LDR     r0x0fe00fe0, =0x0fe00fe0
    150         ADD     Temp5, Temp5, Temp5, LSL #2
    151         ADD     Acc3, Acc3, Temp5
    152 
    153         UQSUB16 Acc3, Acc3, r0x0fe00fe0
    154         UQSUB16 Acc2, Acc2, r0x0fe00fe0
    155         USAT16  Acc3, #13, Acc3
    156         USAT16  Acc2, #13, Acc2
    157 
    158         M_LDR   dstStep, pDstStep
    159         AND     Acc3, r0x00ff00ff, Acc3, LSR #5
    160         AND     Acc2, r0x00ff00ff, Acc2, LSR #5
    161         ORR     Acc2, Acc2, Acc3, LSL #8
    162 
    163         SUBS    Counter, Counter, #1
    164         M_LDR   srcStep, pSrcStep
    165         PKHBT   Acc1, Acc0, Acc2, LSL #16
    166         M_STR   Acc1, [pDst], dstStep                   ;// Store result1
    167         PKHTB   Acc2, Acc2, Acc0, ASR #16
    168         M_STR   Acc2, [pDst], dstStep                   ;// Store result2
    169         ADD     pSrc, pSrc, srcStep, LSL #1
    170 
    171         BGT     NextTwoRowsLoop
    172 End
    173         SUB     pDst, pDst, dstStep, LSL #2
    174         SUB     pSrc, pSrc, srcStep, LSL #2
    175 
    176         M_END
    177 
    178     ENDIF
    179 
    180     END
    181 
    182 
    183 
    184 
    185 
    186 
    187 
    188 
    189 
    190 
    191 
    192 
    193 
    194 
    195 
    196 
    197 
    198 
    199 
    200 
    201 
    202 
    203 
    204 
    205 
    206 
    207 
    208 
    209 
    210 
    211 
    212 
    213 
    214 
    215 
    216 
    217 
    218 
    219 
    220 
    221 
    222 
    223 
    224 
    225 
    226 
    227 
    228 
    229 
    230 
    231 
    232 
    233 
    234 
    235 
    236 
    237 
    238 
    239 
    240