Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   9641
     21 ;// Date:       Thursday, February 7, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27         INCLUDE omxtypes_s.h
     28         INCLUDE armCOMM_s.h
     29 
     30         M_VARIANTS ARM1136JS
     31 
     32         EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
     33 
     34 DEBUG_ON    SETL {FALSE}
     35 
     36 
     37     IF ARM1136JS
     38 
     39 ;// Function:
     40 ;//     armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
     41 ;//
     42 ;// Implements horizontal interpolation for a block of size 4x4. Input and output should
     43 ;// be aligned.
     44 ;//
     45 ;// Registers used as input for this function
     46 ;// r0,r1,r2,r3 where r0,r2  input pointer and r1,r3 corresponding step size
     47 ;//
     48 ;// Registers preserved for top level function
     49 ;// r0,r1,r2,r3,r4,r5,r6,r14
     50 ;//
     51 ;// Registers modified by the function
     52 ;// r7,r8,r9,r10,r11,r12
     53 ;//
     54 ;// Output registers
     55 ;// None. Function will preserve r0-r3
     56 
     57 
     58 ;// Declare input registers
     59 pSrc            RN 0
     60 srcStep         RN 1
     61 pDst            RN 2
     62 dstStep         RN 3
     63 
     64 ;// Declare inner loop registers
     65 Acc0            RN 4
     66 Acc1            RN 5
     67 Acc2            RN 6
     68 Acc3            RN 7
     69 
     70 ValA            RN 4
     71 ValB            RN 5
     72 ValC            RN 6
     73 ValD            RN 7
     74 ValE            RN 8
     75 ValF            RN 9
     76 ValG            RN 12
     77 ValH            RN 14
     78 ValI            RN 1
     79 
     80 Temp1           RN 3
     81 Temp2           RN 1
     82 Temp3           RN 12
     83 Temp4           RN 7
     84 Temp5           RN 5
     85 r0x0fe00fe0     RN 3                                    ;// [0 (16*255 - 16) 0 (16*255 - 16)]
     86 r0x00ff00ff     RN 10                                   ;// [0 255 0 255] where 255 is offset
     87 Counter         RN 11
     88 
     89 Height          RN 3
     90 
     91         M_ALLOC4 pDstStep, 4
     92         M_ALLOC4 pSrcStep, 4
     93 
     94         ;// Function header
     95         M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6
     96 
     97         MOV     Counter, #2
     98         M_STR   dstStep, pDstStep
     99         M_STR   srcStep, pSrcStep
    100         LDR     r0x00ff00ff, =0x00ff00ff               ;// [0 255 0 255] 255 is offset to avoid negative results
    101 
    102 NextTwoRowsLoop
    103         LDR     ValD, [pSrc, srcStep]                   ;// Load row 1 [d1 c1 b1 a1]
    104         LDR     ValA, [pSrc], #4                        ;// Load row 0 [d0 c0 b0 a0]
    105         LDR     ValH, [pSrc, srcStep]                   ;// Load  [h1 g1 f1 e1]
    106         LDR     ValE, [pSrc], #4                        ;// Load  [h0 g0 f0 e0]
    107         LDRB    Temp2, [pSrc, srcStep]                  ;// Load row 1 [l1 k1 j1 i1]
    108         LDRB    Temp1, [pSrc], #-8                      ;// Load row 0 [l0 k0 j0 i0]
    109 
    110         PKHBT   ValB, ValA, ValD, LSL #16               ;// [b1 a1 b0 a0]
    111         PKHTB   ValD, ValD, ValA, ASR #16               ;// [d1 c1 d0 c0]
    112         UXTAB16 ValA, r0x00ff00ff, ValB                 ;// [00 a1 00 a0] + [0 255 0 255]
    113         UXTAB16 ValC, r0x00ff00ff, ValD                 ;// [00 c1 00 c0] + [0 255 0 255]
    114         PKHBT   ValI, Temp1, Temp2, LSL #16             ;// [00 i1 00 i0]
    115         PKHBT   ValF, ValE, ValH, LSL #16               ;// [f1 e1 f0 e0]
    116         PKHTB   ValH, ValH, ValE, ASR #16               ;// [h1 g1 h0 g0]
    117         UXTAB16 ValE, r0x00ff00ff, ValF                 ;// [00 e1 00 e0] + [0 255 0 255]
    118 
    119         ;// Calculate Acc0
    120         ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
    121         UXTAB16 Temp1, ValC, ValD, ROR #8
    122         UXTAB16 Temp3, ValE, ValB, ROR #8
    123         RSB     Temp1, Temp3, Temp1, LSL #2
    124         UXTAB16 Acc0, ValA, ValF, ROR #8
    125         ADD     Temp1, Temp1, Temp1, LSL #2
    126         ADD     Acc0, Acc0, Temp1
    127 
    128         ;// Calculate Acc1
    129         ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
    130         UXTAB16 Temp1, ValE, ValD, ROR #8
    131         UXTAB16 Temp3, ValC, ValF, ROR #8
    132         RSB     Temp1, Temp3, Temp1, LSL #2
    133         UXTAB16 ValG, r0x00ff00ff, ValH                 ;// [00 g1 00 g0] + [0 255 0 255]
    134         ADD     Temp1, Temp1, Temp1, LSL #2
    135         UXTAB16 Acc1, ValG, ValB, ROR #8
    136         ADD     Acc1, Acc1, Temp1
    137 
    138         LDR     r0x0fe00fe0, =0x0fe00fe0                ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255
    139         UXTAB16 Acc2, ValC, ValH, ROR #8
    140         ADD     ValI, r0x00ff00ff, ValI                 ;// [00 i1 00 i0] + [0 255 0 255]
    141         UQSUB16 Acc0, Acc0, r0x0fe00fe0
    142         UQSUB16 Acc1, Acc1, r0x0fe00fe0
    143         USAT16  Acc0, #13, Acc0
    144         USAT16  Acc1, #13, Acc1
    145 
    146         ;// Calculate Acc2
    147         ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
    148         UXTAB16 Temp1, ValG, ValD, ROR #8
    149         UXTAB16 Acc3, ValI, ValD, ROR #8
    150         UXTAB16 Temp2, ValE, ValF, ROR #8
    151         AND     Acc1, r0x00ff00ff, Acc1, LSR #5
    152         AND     Acc0, r0x00ff00ff, Acc0, LSR #5
    153         ORR     Acc0, Acc0, Acc1, LSL #8
    154         RSB     Temp5, Temp1, Temp2, LSL #2
    155         UXTAB16 Temp2, ValG, ValF, ROR #8
    156         ADD     Temp5, Temp5, Temp5, LSL #2
    157         ADD     Acc2, Acc2, Temp5
    158 
    159         ;// Calculate Acc3
    160         ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
    161         UXTAB16 Temp5, ValE, ValH, ROR #8
    162         RSB     Temp5, Temp5, Temp2, LSL #2
    163         LDR     r0x0fe00fe0, =0x0fe00fe0
    164         ADD     Temp5, Temp5, Temp5, LSL #2
    165         ADD     Acc3, Acc3, Temp5
    166 
    167         UQSUB16 Acc3, Acc3, r0x0fe00fe0
    168         UQSUB16 Acc2, Acc2, r0x0fe00fe0
    169         USAT16  Acc3, #13, Acc3
    170         USAT16  Acc2, #13, Acc2
    171 
    172         M_LDR   dstStep, pDstStep
    173         AND     Acc3, r0x00ff00ff, Acc3, LSR #5
    174         AND     Acc2, r0x00ff00ff, Acc2, LSR #5
    175         ORR     Acc2, Acc2, Acc3, LSL #8
    176 
    177         SUBS    Counter, Counter, #1
    178         M_LDR   srcStep, pSrcStep
    179         PKHBT   Acc1, Acc0, Acc2, LSL #16
    180         M_STR   Acc1, [pDst], dstStep                   ;// Store result1
    181         PKHTB   Acc2, Acc2, Acc0, ASR #16
    182         M_STR   Acc2, [pDst], dstStep                   ;// Store result2
    183         ADD     pSrc, pSrc, srcStep, LSL #1
    184 
    185         BGT     NextTwoRowsLoop
    186 End
    187         SUB     pDst, pDst, dstStep, LSL #2
    188         SUB     pSrc, pSrc, srcStep, LSL #2
    189 
    190         M_END
    191 
    192     ENDIF
    193 
    194     END
    195 
    196 
    197 
    198 
    199 
    200 
    201 
    202 
    203 
    204 
    205 
    206 
    207 
    208 
    209 
    210 
    211 
    212 
    213 
    214 
    215 
    216 
    217 
    218 
    219 
    220 
    221 
    222 
    223 
    224 
    225 
    226 
    227 
    228 
    229 
    230 
    231 
    232 
    233 
    234 
    235 
    236 
    237 
    238 
    239 
    240 
    241 
    242 
    243 
    244 
    245 
    246 
    247 
    248 
    249 
    250 
    251 
    252 
    253 
    254