Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   12290
     21 ;// Date:       Wednesday, April 9, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27         INCLUDE omxtypes_s.h
     28         INCLUDE armCOMM_s.h
     29 
     30         M_VARIANTS ARM1136JS
     31 
     32         EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
     33         EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
     34 
     35 ;// Functions:
     36 ;//     armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
     37 ;//     armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
     38 ;//
     39 ;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
     40 ;// This will do the convertion of data from 16 bit to 8 bit and it also
     41 ;// remove offset and check for saturation.
     42 ;//
     43 ;// Registers used as input for this function
     44 ;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
     45 ;//
     46 ;// Registers preserved for top level function
     47 ;// r4,r5,r6,r8,r9,r14
     48 ;//
     49 ;// Registers modified by the function
     50 ;// r7,r10,r11,r12
     51 ;//
     52 ;// Output registers
     53 ;// r0 - pointer to the destination location
     54 ;// r1 - step size to this destination location
     55 
     56 
     57 DEBUG_ON    SETL {FALSE}
     58 
     59 MASK            EQU 0x80808080  ;// Mask is used to implement (a+b+1)/2
     60 
     61 ;// Declare input registers
     62 
     63 pSrc0           RN 0
     64 srcStep0        RN 1
     65 
     66 ;// Declare other intermediate registers
     67 Temp1           RN 4
     68 Temp2           RN 5
     69 Temp3           RN 10
     70 Temp4           RN 11
     71 pBuf            RN 7
     72 r0x0fe00fe0     RN 6
     73 r0x00ff00ff     RN 12
     74 Count           RN 14
     75 ValueA0         RN 10
     76 ValueA1         RN 11
     77 
     78     IF ARM1136JS
     79 
     80 
     81         ;// Function header
     82         M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6
     83 
     84         ;// Code start
     85         MOV         Count, #4
     86         LDR         r0x0fe00fe0, =0x0fe00fe0
     87         LDR         r0x00ff00ff, =0x00ff00ff
     88 LoopStart1
     89         LDR         Temp4, [pSrc0, #12]
     90         LDR         Temp3, [pSrc0, #8]
     91         LDR         Temp2, [pSrc0, #4]
     92         M_LDR       Temp1, [pSrc0], srcStep0
     93         UQSUB16     Temp4, Temp4, r0x0fe00fe0
     94         UQSUB16     Temp3, Temp3, r0x0fe00fe0
     95         UQSUB16     Temp2, Temp2, r0x0fe00fe0
     96         UQSUB16     Temp1, Temp1, r0x0fe00fe0
     97         USAT16      Temp4, #13, Temp4
     98         USAT16      Temp3, #13, Temp3
     99         USAT16      Temp2, #13, Temp2
    100         USAT16      Temp1, #13, Temp1
    101         AND         Temp4, r0x00ff00ff, Temp4, LSR #5
    102         AND         Temp3, r0x00ff00ff, Temp3, LSR #5
    103         AND         Temp2, r0x00ff00ff, Temp2, LSR #5
    104         AND         Temp1, r0x00ff00ff, Temp1, LSR #5
    105         ORR         ValueA1, Temp3, Temp4, LSL #8
    106         ORR         ValueA0, Temp1, Temp2, LSL #8
    107         SUBS        Count, Count, #1
    108         STRD        ValueA0, [pBuf], #8
    109         BGT         LoopStart1
    110 End1
    111         SUB        pSrc0, pBuf, #32
    112         MOV        srcStep0, #8
    113 
    114         M_END
    115 
    116 
    117         ;// Function header
    118         M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
    119 
    120         ;// Code start
    121         LDR         r0x0fe00fe0, =0x0fe00fe0
    122         LDR         r0x00ff00ff, =0x00ff00ff
    123         MOV         Count, #2
    124 
    125 LoopStart
    126         LDR         Temp4, [pSrc0, #12]
    127         LDR         Temp3, [pSrc0, #8]
    128         LDR         Temp2, [pSrc0, #4]
    129         M_LDR       Temp1, [pSrc0], srcStep0
    130 
    131         UQSUB16     Temp4, Temp4, r0x0fe00fe0
    132         UQSUB16     Temp3, Temp3, r0x0fe00fe0
    133         UQSUB16     Temp2, Temp2, r0x0fe00fe0
    134         UQSUB16     Temp1, Temp1, r0x0fe00fe0
    135 
    136         USAT16      Temp4, #13, Temp4
    137         USAT16      Temp3, #13, Temp3
    138         USAT16      Temp2, #13, Temp2
    139         USAT16      Temp1, #13, Temp1
    140 
    141         AND         Temp4, r0x00ff00ff, Temp4, LSR #5
    142         AND         Temp3, r0x00ff00ff, Temp3, LSR #5
    143         AND         Temp2, r0x00ff00ff, Temp2, LSR #5
    144         AND         Temp1, r0x00ff00ff, Temp1, LSR #5
    145         ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]
    146         ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]
    147 
    148         PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]
    149 
    150         STR         Temp1, [pBuf], #8
    151         PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
    152         STR         Temp2, [pBuf], #-4
    153 
    154         LDR         Temp4, [pSrc0, #12]
    155         LDR         Temp3, [pSrc0, #8]
    156         LDR         Temp2, [pSrc0, #4]
    157         M_LDR       Temp1, [pSrc0], srcStep0
    158 
    159         UQSUB16     Temp4, Temp4, r0x0fe00fe0
    160         UQSUB16     Temp3, Temp3, r0x0fe00fe0
    161         UQSUB16     Temp2, Temp2, r0x0fe00fe0
    162         UQSUB16     Temp1, Temp1, r0x0fe00fe0
    163 
    164         USAT16      Temp4, #13, Temp4
    165         USAT16      Temp3, #13, Temp3
    166         USAT16      Temp2, #13, Temp2
    167         USAT16      Temp1, #13, Temp1
    168 
    169         AND         Temp4, r0x00ff00ff, Temp4, LSR #5
    170         AND         Temp3, r0x00ff00ff, Temp3, LSR #5
    171         AND         Temp2, r0x00ff00ff, Temp2, LSR #5
    172         AND         Temp1, r0x00ff00ff, Temp1, LSR #5
    173         ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]
    174         ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]
    175 
    176         PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]
    177         SUBS        Count, Count, #1
    178         STR         Temp1, [pBuf], #8
    179         PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
    180         STR         Temp2, [pBuf], #4
    181 
    182         BGT         LoopStart
    183 End2
    184         SUB         pSrc0, pBuf, #32-8
    185         MOV         srcStep0, #4
    186 
    187         M_END
    188 
    189     ENDIF
    190 
    191     END
    192 
    193