Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  armVCM4P10_Interpolate_Chroma_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   9641
     21 ;// Date:       Thursday, February 7, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27 
     28         INCLUDE omxtypes_s.h
     29         INCLUDE armCOMM_s.h
     30 
     31         M_VARIANTS CortexA8
     32 
     33 
     34     IF CortexA8
     35 
     36     M_TABLE armVCM4P10_WidthBranchTableMVIsNotZero
     37 
     38     DCD   WidthIs2MVIsNotZero, WidthIs2MVIsNotZero
     39     DCD   WidthIs4MVIsNotZero, WidthIs4MVIsNotZero
     40     DCD   WidthIs8MVIsNotZero
     41 
     42     M_TABLE armVCM4P10_WidthBranchTableMVIsZero
     43 
     44     DCD   WidthIs2MVIsZero, WidthIs2MVIsZero
     45     DCD   WidthIs4MVIsZero, WidthIs4MVIsZero
     46     DCD   WidthIs8MVIsZero
     47 
     48 
     49 ;// input registers
     50 
     51 pSrc                 RN 0
     52 iSrcStep             RN 1
     53 pDst                 RN 2
     54 iDstStep             RN 3
     55 iWidth               RN 4
     56 iHeight              RN 5
     57 dx                   RN 6
     58 dy                   RN 7
     59 
     60 ;// local variable registers
     61 pc                   RN 15
     62 return               RN 0
     63 EightMinusdx         RN 8
     64 EightMinusdy         RN 9
     65 
     66 ACoeff               RN 12
     67 BCoeff               RN 9
     68 CCoeff               RN 8
     69 DCoeff               RN 6
     70 
     71 pTable               RN 11
     72 
     73 Step1                RN 10
     74 SrcStepMinus1        RN 14
     75 
     76 dACoeff              DN D12.U8
     77 dBCoeff              DN D13.U8
     78 dCCoeff              DN D14.U8
     79 dDCoeff              DN D15.U8
     80 
     81 dRow0a               DN D0.U8
     82 dRow0b               DN D1.U8
     83 dRow1a               DN D2.U8
     84 dRow1b               DN D3.U8
     85 
     86 qRow0a               QN Q2.S16
     87 qRow0b               QN Q3.S16
     88 
     89 ;//dIndex               DN    D16.U8
     90 qRow1a               QN Q11.S16
     91 qRow1b               QN Q12.S16
     92 
     93 dRow2a               DN D16.U8
     94 dRow2b               DN D17.U8
     95 dRow3a               DN D18.U8
     96 dRow3b               DN D19.U8
     97 
     98 qOutRow2             QN Q11.U16
     99 qOutRow3             QN Q12.U16
    100 dOutRow2             DN D20.U8
    101 dOutRow3             DN D21.U8
    102 dOutRow2U64          DN D20.U64
    103 dOutRow3U64          DN D21.U64
    104 
    105 qOutRow0             QN Q2.U16
    106 qOutRow1             QN Q3.U16
    107 dOutRow0             DN D8.U8
    108 dOutRow1             DN D9.U8
    109 
    110 dOutRow0U64          DN D8.U64
    111 dOutRow1U64          DN D9.U64
    112 
    113 dOutRow0U32          DN D8.U32
    114 dOutRow1U32          DN D9.U32
    115 
    116 dOutRow0U16          DN D8.U16
    117 dOutRow1U16          DN D9.U16
    118 
    119 
    120 dOut0U64             DN D0.U64
    121 dOut1U64             DN D1.U64
    122 
    123 dOut00U32            DN D0.U32
    124 dOut01U32            DN D1.U32
    125 dOut10U32            DN D2.U32
    126 dOut11U32            DN D3.U32
    127 
    128 dOut0U16             DN D0.U16
    129 dOut1U16             DN D1.U16
    130 
    131 ;//-----------------------------------------------------------------------------------------------
    132 ;// armVCM4P10_Interpolate_Chroma_asm starts
    133 ;//-----------------------------------------------------------------------------------------------
    134 
    135         ;// Write function header
    136         M_START armVCM4P10_Interpolate_Chroma, r11, d15
    137 
    138         ;// Define stack arguments
    139         M_ARG   Width,      4
    140         M_ARG   Height,     4
    141         M_ARG   Dx,         4
    142         M_ARG   Dy,         4
    143 
    144         ;// Load argument from the stack
    145         ;// M_STALL ARM1136JS=4
    146 
    147         M_LDRD   dx, dy, Dx
    148         M_LDRD   iWidth, iHeight, Width
    149 
    150         ;// EightMinusdx = 8 - dx
    151         ;// EightMinusdy = 8 - dy
    152 
    153         ;// ACoeff = EightMinusdx * EightMinusdy
    154         ;// BCoeff = dx * EightMinusdy
    155         ;// CCoeff = EightMinusdx * dy
    156         ;// DCoeff = dx * dy
    157 
    158         RSB     EightMinusdx, dx, #8
    159         RSB     EightMinusdy, dy, #8
    160         CMN     dx,dy
    161         MOV     Step1, #1
    162         LDREQ   pTable, =armVCM4P10_WidthBranchTableMVIsZero
    163         SUB     SrcStepMinus1, iSrcStep, Step1
    164         LDRNE   pTable, =armVCM4P10_WidthBranchTableMVIsNotZero
    165 
    166         VLD1    dRow0a, [pSrc], Step1                   ;// 0a
    167 
    168         SMULBB  ACoeff, EightMinusdx, EightMinusdy
    169         SMULBB  BCoeff, dx, EightMinusdy
    170         VLD1    dRow0b, [pSrc], SrcStepMinus1           ;// 0b
    171         SMULBB  CCoeff, EightMinusdx, dy
    172         SMULBB  DCoeff, dx, dy
    173 
    174         VDUP    dACoeff, ACoeff
    175         VDUP    dBCoeff, BCoeff
    176         VDUP    dCCoeff, CCoeff
    177         VDUP    dDCoeff, DCoeff
    178 
    179         LDR     pc, [pTable, iWidth, LSL #1]      ;// Branch to the case based on iWidth
    180 
    181 ;// Pixel layout:
    182 ;//
    183 ;//   x00 x01 x02
    184 ;//   x10 x11 x12
    185 ;//   x20 x21 x22
    186 
    187 ;// If fractionl mv is not (0, 0)
    188 WidthIs8MVIsNotZero
    189 
    190                 VLD1   dRow1a, [pSrc], Step1            ;// 1a
    191                 VMULL  qRow0a, dRow0a, dACoeff
    192                 VLD1   dRow1b, [pSrc], SrcStepMinus1    ;// 1b
    193                 VMULL  qRow0b, dRow1a, dACoeff
    194                 VLD1   dRow2a, [pSrc], Step1            ;// 2a
    195                 VMLAL  qRow0a, dRow0b, dBCoeff
    196                 VLD1   dRow2b, [pSrc], SrcStepMinus1    ;// 2b
    197                 VMULL  qRow1a, dRow2a, dACoeff
    198                 VMLAL  qRow0b, dRow1b, dBCoeff
    199                 VLD1   dRow3a, [pSrc], Step1            ;// 3a
    200                 VMLAL  qRow0a, dRow1a, dCCoeff
    201                 VMLAL  qRow1a, dRow2b, dBCoeff
    202                 VMULL  qRow1b, dRow3a, dACoeff
    203                 VLD1   dRow3b, [pSrc], SrcStepMinus1    ;// 3b
    204                 VMLAL  qRow0b, dRow2a, dCCoeff
    205                 VLD1   dRow0a, [pSrc], Step1            ;// 0a
    206                 VMLAL  qRow1b, dRow3b, dBCoeff
    207                 VMLAL  qRow1a, dRow3a, dCCoeff
    208                 VMLAL  qRow0a, dRow1b, dDCoeff
    209                 VLD1   dRow0b, [pSrc], SrcStepMinus1    ;// 0b
    210                 VMLAL  qRow1b, dRow0a, dCCoeff
    211                 VMLAL  qRow0b, dRow2b, dDCoeff
    212                 VMLAL  qRow1a, dRow3b, dDCoeff
    213 
    214 
    215                 SUBS   iHeight, iHeight, #4
    216                 VMLAL  qRow1b, dRow0b, dDCoeff
    217 
    218                 VQRSHRN dOutRow0, qOutRow0, #6
    219                 VQRSHRN dOutRow1, qOutRow1, #6
    220                 VQRSHRN dOutRow2, qOutRow2, #6
    221                 VST1   dOutRow0U64, [pDst], iDstStep
    222                 VQRSHRN dOutRow3, qOutRow3, #6
    223 
    224                 VST1   dOutRow1U64, [pDst], iDstStep
    225                 VST1   dOutRow2U64, [pDst], iDstStep
    226                 VST1   dOutRow3U64, [pDst], iDstStep
    227 
    228 
    229                 BGT     WidthIs8MVIsNotZero
    230                 MOV     return,  #OMX_Sts_NoErr
    231                 M_EXIT
    232 
    233 WidthIs4MVIsNotZero
    234 
    235                 VLD1   dRow1a, [pSrc], Step1
    236                 VMULL  qRow0a, dRow0a, dACoeff
    237                 VMULL  qRow0b, dRow1a, dACoeff
    238                 VLD1   dRow1b, [pSrc], SrcStepMinus1
    239                 VMLAL  qRow0a, dRow0b, dBCoeff
    240                 VMLAL  qRow0b, dRow1b, dBCoeff
    241                 VLD1   dRow0a, [pSrc], Step1
    242                 VMLAL  qRow0a, dRow1a, dCCoeff
    243                 VMLAL  qRow0b, dRow0a, dCCoeff
    244                 VLD1   dRow0b, [pSrc], SrcStepMinus1
    245                 SUBS   iHeight, iHeight, #2
    246                 VMLAL  qRow0b, dRow0b, dDCoeff
    247                 VMLAL  qRow0a, dRow1b, dDCoeff
    248 
    249                 VQRSHRN dOutRow1, qOutRow1, #6
    250                 VQRSHRN dOutRow0, qOutRow0, #6
    251 
    252                 VST1   dOutRow0U32[0], [pDst], iDstStep
    253                 VST1   dOutRow1U32[0], [pDst], iDstStep
    254 
    255                 BGT     WidthIs4MVIsNotZero
    256                 MOV     return,  #OMX_Sts_NoErr
    257                 M_EXIT
    258 
    259 WidthIs2MVIsNotZero
    260 
    261                 VLD1   dRow1a, [pSrc], Step1
    262                 VMULL  qRow0a, dRow0a, dACoeff
    263                 VMULL  qRow0b, dRow1a, dACoeff
    264                 VLD1   dRow1b, [pSrc], SrcStepMinus1
    265                 VMLAL  qRow0a, dRow0b, dBCoeff
    266                 VMLAL  qRow0b, dRow1b, dBCoeff
    267                 VLD1   dRow0a, [pSrc], Step1
    268                 VMLAL  qRow0a, dRow1a, dCCoeff
    269                 VMLAL  qRow0b, dRow0a, dCCoeff
    270                 VLD1   dRow0b, [pSrc], SrcStepMinus1
    271                 SUBS   iHeight, iHeight, #2
    272                 VMLAL  qRow0b, dRow0b, dDCoeff
    273                 VMLAL  qRow0a, dRow1b, dDCoeff
    274 
    275                 VQRSHRN dOutRow1, qOutRow1, #6
    276                 VQRSHRN dOutRow0, qOutRow0, #6
    277 
    278                 VST1   dOutRow0U16[0], [pDst], iDstStep
    279                 VST1   dOutRow1U16[0], [pDst], iDstStep
    280 
    281                 BGT     WidthIs2MVIsNotZero
    282                 MOV     return,  #OMX_Sts_NoErr
    283                 M_EXIT
    284 
    285 ;// If fractionl mv is (0, 0)
    286 WidthIs8MVIsZero
    287                 SUB     pSrc, pSrc, iSrcStep
    288 
    289 WidthIs8LoopMVIsZero
    290                 VLD1    dRow0a, [pSrc], iSrcStep
    291                 SUBS    iHeight, iHeight, #2
    292                 VLD1    dRow0b, [pSrc], iSrcStep
    293                 VST1    dOut0U64, [pDst], iDstStep
    294                 VST1    dOut1U64, [pDst], iDstStep
    295                 BGT     WidthIs8LoopMVIsZero
    296 
    297                 MOV     return,  #OMX_Sts_NoErr
    298                 M_EXIT
    299 
    300 WidthIs4MVIsZero
    301                 VLD1    dRow0b, [pSrc], iSrcStep
    302 
    303                 SUBS    iHeight, iHeight, #2
    304 
    305                 VST1    dOut00U32[0], [pDst], iDstStep
    306                 VLD1    dRow0a, [pSrc], iSrcStep
    307                 VST1    dOut01U32[0], [pDst], iDstStep
    308 
    309                 BGT     WidthIs4MVIsZero
    310                 MOV     return,  #OMX_Sts_NoErr
    311                 M_EXIT
    312 
    313 WidthIs2MVIsZero
    314                 VLD1    dRow0b, [pSrc], iSrcStep
    315                 SUBS    iHeight, iHeight, #2
    316 
    317                 VST1    dOut0U16[0], [pDst], iDstStep
    318                 VLD1    dRow0a, [pSrc], iSrcStep
    319                 VST1    dOut1U16[0], [pDst], iDstStep
    320 
    321                 BGT     WidthIs2MVIsZero
    322                 MOV     return,  #OMX_Sts_NoErr
    323                 M_END
    324 
    325         ENDIF ;// CortexA8
    326 
    327         END
    328 
    329 ;//-----------------------------------------------------------------------------------------------
    330 ;// armVCM4P10_Interpolate_Chroma_asm ends
    331 ;//-----------------------------------------------------------------------------------------------
    332 
    333