Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  armVCM4P10_Interpolate_Chroma_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   9641
     21 ;// Date:       Thursday, February 7, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27 
     28         INCLUDE omxtypes_s.h
     29         INCLUDE armCOMM_s.h
     30 
     31         M_VARIANTS ARM1136JS
     32 
     33     IF ARM1136JS
     34 
     35 ;// input registers
     36 
     37 pSrc                 RN 0
     38 iSrcStep             RN 1
     39 pDst                 RN 2
     40 iDstStep             RN 3
     41 iWidth               RN 4
     42 iHeight              RN 5
     43 dx                   RN 6
     44 dy                   RN 7
     45 
     46 
     47 ;// local variable registers
     48 temp                 RN 11
     49 r0x20                RN 12
     50 tmp0x20              RN 14
     51 return               RN 0
     52 dxPlusdy             RN 10
     53 EightMinusdx         RN 8
     54 EightMinusdy         RN 9
     55 dxEightMinusdx       RN 8
     56 BACoeff              RN 6
     57 DCCoeff              RN 7
     58 
     59 iDstStepx2MinusWidth RN 8
     60 iSrcStepx2MinusWidth RN 9
     61 iSrcStep1            RN 10
     62 
     63 pSrc1                RN 1
     64 pSrc2                RN 8
     65 pDst1                RN 8
     66 pDst2                RN 12
     67 
     68 pix00                RN 8
     69 pix01                RN 9
     70 pix10                RN 10
     71 pix11                RN 11
     72 
     73 Out0100              RN 8
     74 Out1110              RN 10
     75 
     76 x00                  RN 8
     77 x01                  RN 10
     78 x02                  RN 12
     79 x10                  RN 9
     80 x11                  RN 11
     81 x12                  RN 14
     82 x20                  RN 10
     83 x21                  RN 12
     84 x22                  RN 14
     85 
     86 x01x00               RN 8
     87 x02x01               RN 10
     88 x11x10               RN 9
     89 x12x11               RN 11
     90 x21x20               RN 10
     91 x22x21               RN 12
     92 
     93 OutRow00             RN 12
     94 OutRow01             RN 14
     95 OutRow10             RN 10
     96 OutRow11             RN 12
     97 
     98 OutRow0100           RN 12
     99 OutRow1110           RN 12
    100 
    101 ;//-----------------------------------------------------------------------------------------------
    102 ;// armVCM4P10_Interpolate_Chroma_asm starts
    103 ;//-----------------------------------------------------------------------------------------------
    104 
    105         ;// Write function header
    106         M_START armVCM4P10_Interpolate_Chroma, r11
    107 
    108         ;// Define stack arguments
    109         M_ARG   Width,      4
    110         M_ARG   Height,     4
    111         M_ARG   Dx,         4
    112         M_ARG   Dy,         4
    113 
    114         ;// Load argument from the stack
    115         ;// M_STALL ARM1136JS=4
    116 
    117         M_LDR   iWidth,  Width
    118         M_LDR   iHeight, Height
    119         M_LDR   dx,      Dx
    120         M_LDR   dy,      Dy
    121 
    122         ;// EightMinusdx = 8 - dx
    123         ;// EightMinusdy = 8 - dy
    124 
    125         ;// ACoeff = EightMinusdx * EightMinusdy
    126         ;// BCoeff = dx * EightMinusdy
    127         ;// CCoeff = EightMinusdx * dy
    128         ;// DCoeff = dx * dy
    129 
    130         ADD     pSrc1, pSrc, iSrcStep
    131         SUB     temp, iWidth, #1
    132         RSB     EightMinusdx, dx, #8
    133         RSB     EightMinusdy, dy, #8
    134         CMN     dx,dy
    135         ADD     dxEightMinusdx, EightMinusdx, dx, LSL #16
    136         ORR     iWidth, iWidth, temp, LSL #16
    137 
    138         ;// Packed Coeffs.
    139 
    140         MUL     BACoeff, dxEightMinusdx, EightMinusdy
    141         MUL     DCCoeff, dxEightMinusdx, dy
    142 
    143 
    144         ;// Checking either of dx and dy being non-zero
    145 
    146         BEQ     MVIsZero
    147 
    148 ;// Pixel layout:
    149 ;//
    150 ;//   x00 x01 x02
    151 ;//   x10 x11 x12
    152 ;//   x20 x21 x22
    153 
    154 ;// If fractionl mv is not (0, 0)
    155 
    156 OuterLoopMVIsNotZero
    157 
    158 InnerLoopMVIsNotZero
    159 
    160                 LDRB    x00, [pSrc,  #+0]
    161                 LDRB    x10, [pSrc1, #+0]
    162                 LDRB    x01, [pSrc,  #+1]
    163                 LDRB    x11, [pSrc1, #+1]
    164                 LDRB    x02, [pSrc,  #+2]!
    165                 LDRB    x12, [pSrc1, #+2]!
    166 
    167                 ORR     x01x00, x00, x01, LSL #16
    168                 ;// M_STALL ARM1136JS=1
    169                 ORR     x02x01, x01, x02, LSL #16
    170                 MOV     r0x20,  #32
    171                 ORR     x11x10, x10, x11, LSL #16
    172                 ORR     x12x11, x11, x12, LSL #16
    173 
    174                 SMLAD   x01x00, x01x00, BACoeff, r0x20
    175                 SMLAD   x02x01, x02x01, BACoeff, r0x20
    176 
    177                 ;// iWidth packed with MSB (top 16 bits)
    178                 ;// as inner loop counter value i.e
    179                 ;// (iWidth -1) and LSB (lower 16 bits)
    180                 ;// as original width
    181 
    182                 SUBS    iWidth, iWidth, #1<<17
    183 
    184                 SMLAD   OutRow00, x11x10, DCCoeff, x01x00
    185                 SMLAD   OutRow01, x12x11, DCCoeff, x02x01
    186 
    187                 RSB     pSrc2, pSrc, pSrc1, LSL #1
    188 
    189                 MOV     OutRow00, OutRow00, LSR #6
    190                 MOV     OutRow01, OutRow01, LSR #6
    191 
    192                 LDRB    x20,[pSrc2, #-2]
    193 
    194                 ORR     OutRow0100, OutRow00, OutRow01, LSL #8
    195                 STRH    OutRow0100, [pDst], #2
    196 
    197                 LDRB    x21,[pSrc2, #-1]
    198                 LDRB    x22,[pSrc2, #+0]
    199 
    200                 ADD     pDst1, pDst, iDstStep
    201 
    202                 ;// M_STALL ARM1136JS=1
    203 
    204                 ORR     x21x20, x20, x21, LSL #16
    205                 ORR     x22x21, x21, x22, LSL #16
    206 
    207                 MOV     tmp0x20, #32
    208 
    209                 ;// Reusing the packed data x11x10 and x12x11
    210 
    211                 SMLAD   x11x10,  x11x10,  BACoeff, tmp0x20
    212                 SMLAD   x12x11,  x12x11,  BACoeff, tmp0x20
    213                 SMLAD   OutRow10, x21x20, DCCoeff, x11x10
    214                 SMLAD   OutRow11, x22x21, DCCoeff, x12x11
    215 
    216                 MOV     OutRow10, OutRow10, LSR #6
    217                 MOV     OutRow11, OutRow11, LSR #6
    218 
    219                 ;// M_STALL ARM1136JS=1
    220 
    221                 ORR     OutRow1110, OutRow10, OutRow11, LSL #8
    222 
    223                 STRH    OutRow1110, [pDst1, #-2]
    224 
    225                 BGT     InnerLoopMVIsNotZero
    226 
    227                 SUBS    iHeight, iHeight, #2
    228                 ADD     iWidth, iWidth, #1<<16
    229                 RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
    230                 SUB     iSrcStep1, pSrc1, pSrc
    231                 SUB     temp, iWidth, #1
    232                 RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
    233                 ADD     pDst, pDst, iDstStepx2MinusWidth
    234                 ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
    235                 ADD     pSrc, pSrc, iSrcStepx2MinusWidth
    236                 ORR     iWidth, iWidth, temp, LSL #16
    237                 BGT     OuterLoopMVIsNotZero
    238                 MOV     return,  #OMX_Sts_NoErr
    239                 M_EXIT
    240 
    241 ;// If fractionl mv is (0, 0)
    242 
    243 MVIsZero
    244                 ;// M_STALL ARM1136JS=4
    245 OuterLoopMVIsZero
    246 
    247 InnerLoopMVIsZero
    248 
    249                 LDRB    pix00, [pSrc],  #+1
    250                 LDRB    pix01, [pSrc],  #+1
    251                 LDRB    pix10, [pSrc1], #+1
    252                 LDRB    pix11, [pSrc1], #+1
    253 
    254                 ADD     pDst2,  pDst, iDstStep
    255                 SUBS    iWidth, iWidth, #1<<17
    256 
    257                 ORR     Out0100, pix00, pix01, LSL #8
    258                 ORR     Out1110, pix10, pix11, LSL #8
    259 
    260                 STRH    Out0100, [pDst],  #2
    261                 STRH    Out1110, [pDst2], #2
    262 
    263                 BGT     InnerLoopMVIsZero
    264 
    265                 SUBS    iHeight, iHeight, #2
    266                 ADD     iWidth, iWidth, #1<<16
    267                 RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
    268                 SUB     iSrcStep1, pSrc1, pSrc
    269                 SUB     temp, iWidth, #1
    270                 RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
    271                 ADD     pDst, pDst, iDstStepx2MinusWidth
    272                 ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
    273                 ADD     pSrc, pSrc, iSrcStepx2MinusWidth
    274                 ORR     iWidth, iWidth, temp, LSL #16
    275                 BGT     OuterLoopMVIsZero
    276                 MOV     return,  #OMX_Sts_NoErr
    277                 M_END
    278 
    279         ENDIF ;// ARM1136JS
    280 
    281 
    282         END
    283 
    284 ;//-----------------------------------------------------------------------------------------------
    285 ;// armVCM4P10_Interpolate_Chroma_asm ends
    286 ;//-----------------------------------------------------------------------------------------------
    287 
    288