Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  armVCM4P10_Interpolate_Chroma_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   9641
      6 ;// Date:       Thursday, February 7, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13 
     14         INCLUDE omxtypes_s.h
     15         INCLUDE armCOMM_s.h
     16 
     17         M_VARIANTS ARM1136JS
     18 
     19     IF ARM1136JS
     20 
     21 ;// input registers
     22 
     23 pSrc                 RN 0
     24 iSrcStep             RN 1
     25 pDst                 RN 2
     26 iDstStep             RN 3
     27 iWidth               RN 4
     28 iHeight              RN 5
     29 dx                   RN 6
     30 dy                   RN 7
     31 
     32 
     33 ;// local variable registers
     34 temp                 RN 11
     35 r0x20                RN 12
     36 tmp0x20              RN 14
     37 return               RN 0
     38 dxPlusdy             RN 10
     39 EightMinusdx         RN 8
     40 EightMinusdy         RN 9
     41 dxEightMinusdx       RN 8
     42 BACoeff              RN 6
     43 DCCoeff              RN 7
     44 
     45 iDstStepx2MinusWidth RN 8
     46 iSrcStepx2MinusWidth RN 9
     47 iSrcStep1            RN 10
     48 
     49 pSrc1                RN 1
     50 pSrc2                RN 8
     51 pDst1                RN 8
     52 pDst2                RN 12
     53 
     54 pix00                RN 8
     55 pix01                RN 9
     56 pix10                RN 10
     57 pix11                RN 11
     58 
     59 Out0100              RN 8
     60 Out1110              RN 10
     61 
     62 x00                  RN 8
     63 x01                  RN 10
     64 x02                  RN 12
     65 x10                  RN 9
     66 x11                  RN 11
     67 x12                  RN 14
     68 x20                  RN 10
     69 x21                  RN 12
     70 x22                  RN 14
     71 
     72 x01x00               RN 8
     73 x02x01               RN 10
     74 x11x10               RN 9
     75 x12x11               RN 11
     76 x21x20               RN 10
     77 x22x21               RN 12
     78 
     79 OutRow00             RN 12
     80 OutRow01             RN 14
     81 OutRow10             RN 10
     82 OutRow11             RN 12
     83 
     84 OutRow0100           RN 12
     85 OutRow1110           RN 12
     86 
     87 ;//-----------------------------------------------------------------------------------------------
     88 ;// armVCM4P10_Interpolate_Chroma_asm starts
     89 ;//-----------------------------------------------------------------------------------------------
     90 
     91         ;// Write function header
     92         M_START armVCM4P10_Interpolate_Chroma, r11
     93 
     94         ;// Define stack arguments
     95         M_ARG   Width,      4
     96         M_ARG   Height,     4
     97         M_ARG   Dx,         4
     98         M_ARG   Dy,         4
     99 
    100         ;// Load argument from the stack
    101         ;// M_STALL ARM1136JS=4
    102 
    103         M_LDR   iWidth,  Width
    104         M_LDR   iHeight, Height
    105         M_LDR   dx,      Dx
    106         M_LDR   dy,      Dy
    107 
    108         ;// EightMinusdx = 8 - dx
    109         ;// EightMinusdy = 8 - dy
    110 
    111         ;// ACoeff = EightMinusdx * EightMinusdy
    112         ;// BCoeff = dx * EightMinusdy
    113         ;// CCoeff = EightMinusdx * dy
    114         ;// DCoeff = dx * dy
    115 
    116         ADD     pSrc1, pSrc, iSrcStep
    117         SUB     temp, iWidth, #1
    118         RSB     EightMinusdx, dx, #8
    119         RSB     EightMinusdy, dy, #8
    120         CMN     dx,dy
    121         ADD     dxEightMinusdx, EightMinusdx, dx, LSL #16
    122         ORR     iWidth, iWidth, temp, LSL #16
    123 
    124         ;// Packed Coeffs.
    125 
    126         MUL     BACoeff, dxEightMinusdx, EightMinusdy
    127         MUL     DCCoeff, dxEightMinusdx, dy
    128 
    129 
    130         ;// Checking either of dx and dy being non-zero
    131 
    132         BEQ     MVIsZero
    133 
    134 ;// Pixel layout:
    135 ;//
    136 ;//   x00 x01 x02
    137 ;//   x10 x11 x12
    138 ;//   x20 x21 x22
    139 
    140 ;// If fractionl mv is not (0, 0)
    141 
    142 OuterLoopMVIsNotZero
    143 
    144 InnerLoopMVIsNotZero
    145 
    146                 LDRB    x00, [pSrc,  #+0]
    147                 LDRB    x10, [pSrc1, #+0]
    148                 LDRB    x01, [pSrc,  #+1]
    149                 LDRB    x11, [pSrc1, #+1]
    150                 LDRB    x02, [pSrc,  #+2]!
    151                 LDRB    x12, [pSrc1, #+2]!
    152 
    153                 ORR     x01x00, x00, x01, LSL #16
    154                 ;// M_STALL ARM1136JS=1
    155                 ORR     x02x01, x01, x02, LSL #16
    156                 MOV     r0x20,  #32
    157                 ORR     x11x10, x10, x11, LSL #16
    158                 ORR     x12x11, x11, x12, LSL #16
    159 
    160                 SMLAD   x01x00, x01x00, BACoeff, r0x20
    161                 SMLAD   x02x01, x02x01, BACoeff, r0x20
    162 
    163                 ;// iWidth packed with MSB (top 16 bits)
    164                 ;// as inner loop counter value i.e
    165                 ;// (iWidth -1) and LSB (lower 16 bits)
    166                 ;// as original width
    167 
    168                 SUBS    iWidth, iWidth, #1<<17
    169 
    170                 SMLAD   OutRow00, x11x10, DCCoeff, x01x00
    171                 SMLAD   OutRow01, x12x11, DCCoeff, x02x01
    172 
    173                 RSB     pSrc2, pSrc, pSrc1, LSL #1
    174 
    175                 MOV     OutRow00, OutRow00, LSR #6
    176                 MOV     OutRow01, OutRow01, LSR #6
    177 
    178                 LDRB    x20,[pSrc2, #-2]
    179 
    180                 ORR     OutRow0100, OutRow00, OutRow01, LSL #8
    181                 STRH    OutRow0100, [pDst], #2
    182 
    183                 LDRB    x21,[pSrc2, #-1]
    184                 LDRB    x22,[pSrc2, #+0]
    185 
    186                 ADD     pDst1, pDst, iDstStep
    187 
    188                 ;// M_STALL ARM1136JS=1
    189 
    190                 ORR     x21x20, x20, x21, LSL #16
    191                 ORR     x22x21, x21, x22, LSL #16
    192 
    193                 MOV     tmp0x20, #32
    194 
    195                 ;// Reusing the packed data x11x10 and x12x11
    196 
    197                 SMLAD   x11x10,  x11x10,  BACoeff, tmp0x20
    198                 SMLAD   x12x11,  x12x11,  BACoeff, tmp0x20
    199                 SMLAD   OutRow10, x21x20, DCCoeff, x11x10
    200                 SMLAD   OutRow11, x22x21, DCCoeff, x12x11
    201 
    202                 MOV     OutRow10, OutRow10, LSR #6
    203                 MOV     OutRow11, OutRow11, LSR #6
    204 
    205                 ;// M_STALL ARM1136JS=1
    206 
    207                 ORR     OutRow1110, OutRow10, OutRow11, LSL #8
    208 
    209                 STRH    OutRow1110, [pDst1, #-2]
    210 
    211                 BGT     InnerLoopMVIsNotZero
    212 
    213                 SUBS    iHeight, iHeight, #2
    214                 ADD     iWidth, iWidth, #1<<16
    215                 RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
    216                 SUB     iSrcStep1, pSrc1, pSrc
    217                 SUB     temp, iWidth, #1
    218                 RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
    219                 ADD     pDst, pDst, iDstStepx2MinusWidth
    220                 ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
    221                 ADD     pSrc, pSrc, iSrcStepx2MinusWidth
    222                 ORR     iWidth, iWidth, temp, LSL #16
    223                 BGT     OuterLoopMVIsNotZero
    224                 MOV     return,  #OMX_Sts_NoErr
    225                 M_EXIT
    226 
    227 ;// If fractionl mv is (0, 0)
    228 
    229 MVIsZero
    230                 ;// M_STALL ARM1136JS=4
    231 OuterLoopMVIsZero
    232 
    233 InnerLoopMVIsZero
    234 
    235                 LDRB    pix00, [pSrc],  #+1
    236                 LDRB    pix01, [pSrc],  #+1
    237                 LDRB    pix10, [pSrc1], #+1
    238                 LDRB    pix11, [pSrc1], #+1
    239 
    240                 ADD     pDst2,  pDst, iDstStep
    241                 SUBS    iWidth, iWidth, #1<<17
    242 
    243                 ORR     Out0100, pix00, pix01, LSL #8
    244                 ORR     Out1110, pix10, pix11, LSL #8
    245 
    246                 STRH    Out0100, [pDst],  #2
    247                 STRH    Out1110, [pDst2], #2
    248 
    249                 BGT     InnerLoopMVIsZero
    250 
    251                 SUBS    iHeight, iHeight, #2
    252                 ADD     iWidth, iWidth, #1<<16
    253                 RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
    254                 SUB     iSrcStep1, pSrc1, pSrc
    255                 SUB     temp, iWidth, #1
    256                 RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
    257                 ADD     pDst, pDst, iDstStepx2MinusWidth
    258                 ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
    259                 ADD     pSrc, pSrc, iSrcStepx2MinusWidth
    260                 ORR     iWidth, iWidth, temp, LSL #16
    261                 BGT     OuterLoopMVIsZero
    262                 MOV     return,  #OMX_Sts_NoErr
    263                 M_END
    264 
    265         ENDIF ;// ARM1136JS
    266 
    267 
    268         END
    269 
    270 ;//-----------------------------------------------------------------------------------------------
    271 ;// armVCM4P10_Interpolate_Chroma_asm ends
    272 ;//-----------------------------------------------------------------------------------------------
    273 
    274