Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  armVCM4P10_Interpolate_Chroma_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   9641
      6 ;// Date:       Thursday, February 7, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13 
     14         INCLUDE omxtypes_s.h
     15         INCLUDE armCOMM_s.h
     16 
     17         M_VARIANTS CortexA8
     18 
     19 
     20     IF CortexA8
     21 
     22     M_TABLE armVCM4P10_WidthBranchTableMVIsNotZero
     23 
     24     DCD   WidthIs2MVIsNotZero, WidthIs2MVIsNotZero
     25     DCD   WidthIs4MVIsNotZero, WidthIs4MVIsNotZero
     26     DCD   WidthIs8MVIsNotZero
     27 
     28     M_TABLE armVCM4P10_WidthBranchTableMVIsZero
     29 
     30     DCD   WidthIs2MVIsZero, WidthIs2MVIsZero
     31     DCD   WidthIs4MVIsZero, WidthIs4MVIsZero
     32     DCD   WidthIs8MVIsZero
     33 
     34 
     35 ;// input registers
     36 
     37 pSrc                 RN 0
     38 iSrcStep             RN 1
     39 pDst                 RN 2
     40 iDstStep             RN 3
     41 iWidth               RN 4
     42 iHeight              RN 5
     43 dx                   RN 6
     44 dy                   RN 7
     45 
     46 ;// local variable registers
     47 pc                   RN 15
     48 return               RN 0
     49 EightMinusdx         RN 8
     50 EightMinusdy         RN 9
     51 
     52 ACoeff               RN 12
     53 BCoeff               RN 9
     54 CCoeff               RN 8
     55 DCoeff               RN 6
     56 
     57 pTable               RN 11
     58 
     59 Step1                RN 10
     60 SrcStepMinus1        RN 14
     61 
     62 dACoeff              DN D12.U8
     63 dBCoeff              DN D13.U8
     64 dCCoeff              DN D14.U8
     65 dDCoeff              DN D15.U8
     66 
     67 dRow0a               DN D0.U8
     68 dRow0b               DN D1.U8
     69 dRow1a               DN D2.U8
     70 dRow1b               DN D3.U8
     71 
     72 qRow0a               QN Q2.S16
     73 qRow0b               QN Q3.S16
     74 
     75 ;//dIndex               DN    D16.U8
     76 qRow1a               QN Q11.S16
     77 qRow1b               QN Q12.S16
     78 
     79 dRow2a               DN D16.U8
     80 dRow2b               DN D17.U8
     81 dRow3a               DN D18.U8
     82 dRow3b               DN D19.U8
     83 
     84 qOutRow2             QN Q11.U16
     85 qOutRow3             QN Q12.U16
     86 dOutRow2             DN D20.U8
     87 dOutRow3             DN D21.U8
     88 dOutRow2U64          DN D20.U64
     89 dOutRow3U64          DN D21.U64
     90 
     91 qOutRow0             QN Q2.U16
     92 qOutRow1             QN Q3.U16
     93 dOutRow0             DN D8.U8
     94 dOutRow1             DN D9.U8
     95 
     96 dOutRow0U64          DN D8.U64
     97 dOutRow1U64          DN D9.U64
     98 
     99 dOutRow0U32          DN D8.U32
    100 dOutRow1U32          DN D9.U32
    101 
    102 dOutRow0U16          DN D8.U16
    103 dOutRow1U16          DN D9.U16
    104 
    105 
    106 dOut0U64             DN D0.U64
    107 dOut1U64             DN D1.U64
    108 
    109 dOut00U32            DN D0.U32
    110 dOut01U32            DN D1.U32
    111 dOut10U32            DN D2.U32
    112 dOut11U32            DN D3.U32
    113 
    114 dOut0U16             DN D0.U16
    115 dOut1U16             DN D1.U16
    116 
    117 ;//-----------------------------------------------------------------------------------------------
    118 ;// armVCM4P10_Interpolate_Chroma_asm starts
    119 ;//-----------------------------------------------------------------------------------------------
    120 
    121         ;// Write function header
    122         M_START armVCM4P10_Interpolate_Chroma, r11, d15
    123 
    124         ;// Define stack arguments
    125         M_ARG   Width,      4
    126         M_ARG   Height,     4
    127         M_ARG   Dx,         4
    128         M_ARG   Dy,         4
    129 
    130         ;// Load argument from the stack
    131         ;// M_STALL ARM1136JS=4
    132 
    133         M_LDRD   dx, dy, Dx
    134         M_LDRD   iWidth, iHeight, Width
    135 
    136         ;// EightMinusdx = 8 - dx
    137         ;// EightMinusdy = 8 - dy
    138 
    139         ;// ACoeff = EightMinusdx * EightMinusdy
    140         ;// BCoeff = dx * EightMinusdy
    141         ;// CCoeff = EightMinusdx * dy
    142         ;// DCoeff = dx * dy
    143 
    144         RSB     EightMinusdx, dx, #8
    145         RSB     EightMinusdy, dy, #8
    146         CMN     dx,dy
    147         MOV     Step1, #1
    148         LDREQ   pTable, =armVCM4P10_WidthBranchTableMVIsZero
    149         SUB     SrcStepMinus1, iSrcStep, Step1
    150         LDRNE   pTable, =armVCM4P10_WidthBranchTableMVIsNotZero
    151 
    152         VLD1    dRow0a, [pSrc], Step1                   ;// 0a
    153 
    154         SMULBB  ACoeff, EightMinusdx, EightMinusdy
    155         SMULBB  BCoeff, dx, EightMinusdy
    156         VLD1    dRow0b, [pSrc], SrcStepMinus1           ;// 0b
    157         SMULBB  CCoeff, EightMinusdx, dy
    158         SMULBB  DCoeff, dx, dy
    159 
    160         VDUP    dACoeff, ACoeff
    161         VDUP    dBCoeff, BCoeff
    162         VDUP    dCCoeff, CCoeff
    163         VDUP    dDCoeff, DCoeff
    164 
    165         LDR     pc, [pTable, iWidth, LSL #1]      ;// Branch to the case based on iWidth
    166 
    167 ;// Pixel layout:
    168 ;//
    169 ;//   x00 x01 x02
    170 ;//   x10 x11 x12
    171 ;//   x20 x21 x22
    172 
    173 ;// If fractionl mv is not (0, 0)
    174 WidthIs8MVIsNotZero
    175 
    176                 VLD1   dRow1a, [pSrc], Step1            ;// 1a
    177                 VMULL  qRow0a, dRow0a, dACoeff
    178                 VLD1   dRow1b, [pSrc], SrcStepMinus1    ;// 1b
    179                 VMULL  qRow0b, dRow1a, dACoeff
    180                 VLD1   dRow2a, [pSrc], Step1            ;// 2a
    181                 VMLAL  qRow0a, dRow0b, dBCoeff
    182                 VLD1   dRow2b, [pSrc], SrcStepMinus1    ;// 2b
    183                 VMULL  qRow1a, dRow2a, dACoeff
    184                 VMLAL  qRow0b, dRow1b, dBCoeff
    185                 VLD1   dRow3a, [pSrc], Step1            ;// 3a
    186                 VMLAL  qRow0a, dRow1a, dCCoeff
    187                 VMLAL  qRow1a, dRow2b, dBCoeff
    188                 VMULL  qRow1b, dRow3a, dACoeff
    189                 VLD1   dRow3b, [pSrc], SrcStepMinus1    ;// 3b
    190                 VMLAL  qRow0b, dRow2a, dCCoeff
    191                 VLD1   dRow0a, [pSrc], Step1            ;// 0a
    192                 VMLAL  qRow1b, dRow3b, dBCoeff
    193                 VMLAL  qRow1a, dRow3a, dCCoeff
    194                 VMLAL  qRow0a, dRow1b, dDCoeff
    195                 VLD1   dRow0b, [pSrc], SrcStepMinus1    ;// 0b
    196                 VMLAL  qRow1b, dRow0a, dCCoeff
    197                 VMLAL  qRow0b, dRow2b, dDCoeff
    198                 VMLAL  qRow1a, dRow3b, dDCoeff
    199 
    200 
    201                 SUBS   iHeight, iHeight, #4
    202                 VMLAL  qRow1b, dRow0b, dDCoeff
    203 
    204                 VQRSHRN dOutRow0, qOutRow0, #6
    205                 VQRSHRN dOutRow1, qOutRow1, #6
    206                 VQRSHRN dOutRow2, qOutRow2, #6
    207                 VST1   dOutRow0U64, [pDst], iDstStep
    208                 VQRSHRN dOutRow3, qOutRow3, #6
    209 
    210                 VST1   dOutRow1U64, [pDst], iDstStep
    211                 VST1   dOutRow2U64, [pDst], iDstStep
    212                 VST1   dOutRow3U64, [pDst], iDstStep
    213 
    214 
    215                 BGT     WidthIs8MVIsNotZero
    216                 MOV     return,  #OMX_Sts_NoErr
    217                 M_EXIT
    218 
    219 WidthIs4MVIsNotZero
    220 
    221                 VLD1   dRow1a, [pSrc], Step1
    222                 VMULL  qRow0a, dRow0a, dACoeff
    223                 VMULL  qRow0b, dRow1a, dACoeff
    224                 VLD1   dRow1b, [pSrc], SrcStepMinus1
    225                 VMLAL  qRow0a, dRow0b, dBCoeff
    226                 VMLAL  qRow0b, dRow1b, dBCoeff
    227                 VLD1   dRow0a, [pSrc], Step1
    228                 VMLAL  qRow0a, dRow1a, dCCoeff
    229                 VMLAL  qRow0b, dRow0a, dCCoeff
    230                 VLD1   dRow0b, [pSrc], SrcStepMinus1
    231                 SUBS   iHeight, iHeight, #2
    232                 VMLAL  qRow0b, dRow0b, dDCoeff
    233                 VMLAL  qRow0a, dRow1b, dDCoeff
    234 
    235                 VQRSHRN dOutRow1, qOutRow1, #6
    236                 VQRSHRN dOutRow0, qOutRow0, #6
    237 
    238                 VST1   dOutRow0U32[0], [pDst], iDstStep
    239                 VST1   dOutRow1U32[0], [pDst], iDstStep
    240 
    241                 BGT     WidthIs4MVIsNotZero
    242                 MOV     return,  #OMX_Sts_NoErr
    243                 M_EXIT
    244 
    245 WidthIs2MVIsNotZero
    246 
    247                 VLD1   dRow1a, [pSrc], Step1
    248                 VMULL  qRow0a, dRow0a, dACoeff
    249                 VMULL  qRow0b, dRow1a, dACoeff
    250                 VLD1   dRow1b, [pSrc], SrcStepMinus1
    251                 VMLAL  qRow0a, dRow0b, dBCoeff
    252                 VMLAL  qRow0b, dRow1b, dBCoeff
    253                 VLD1   dRow0a, [pSrc], Step1
    254                 VMLAL  qRow0a, dRow1a, dCCoeff
    255                 VMLAL  qRow0b, dRow0a, dCCoeff
    256                 VLD1   dRow0b, [pSrc], SrcStepMinus1
    257                 SUBS   iHeight, iHeight, #2
    258                 VMLAL  qRow0b, dRow0b, dDCoeff
    259                 VMLAL  qRow0a, dRow1b, dDCoeff
    260 
    261                 VQRSHRN dOutRow1, qOutRow1, #6
    262                 VQRSHRN dOutRow0, qOutRow0, #6
    263 
    264                 VST1   dOutRow0U16[0], [pDst], iDstStep
    265                 VST1   dOutRow1U16[0], [pDst], iDstStep
    266 
    267                 BGT     WidthIs2MVIsNotZero
    268                 MOV     return,  #OMX_Sts_NoErr
    269                 M_EXIT
    270 
    271 ;// If fractionl mv is (0, 0)
    272 WidthIs8MVIsZero
    273                 SUB     pSrc, pSrc, iSrcStep
    274 
    275 WidthIs8LoopMVIsZero
    276                 VLD1    dRow0a, [pSrc], iSrcStep
    277                 SUBS    iHeight, iHeight, #2
    278                 VLD1    dRow0b, [pSrc], iSrcStep
    279                 VST1    dOut0U64, [pDst], iDstStep
    280                 VST1    dOut1U64, [pDst], iDstStep
    281                 BGT     WidthIs8LoopMVIsZero
    282 
    283                 MOV     return,  #OMX_Sts_NoErr
    284                 M_EXIT
    285 
    286 WidthIs4MVIsZero
    287                 VLD1    dRow0b, [pSrc], iSrcStep
    288 
    289                 SUBS    iHeight, iHeight, #2
    290 
    291                 VST1    dOut00U32[0], [pDst], iDstStep
    292                 VLD1    dRow0a, [pSrc], iSrcStep
    293                 VST1    dOut01U32[0], [pDst], iDstStep
    294 
    295                 BGT     WidthIs4MVIsZero
    296                 MOV     return,  #OMX_Sts_NoErr
    297                 M_EXIT
    298 
    299 WidthIs2MVIsZero
    300                 VLD1    dRow0b, [pSrc], iSrcStep
    301                 SUBS    iHeight, iHeight, #2
    302 
    303                 VST1    dOut0U16[0], [pDst], iDstStep
    304                 VLD1    dRow0a, [pSrc], iSrcStep
    305                 VST1    dOut1U16[0], [pDst], iDstStep
    306 
    307                 BGT     WidthIs2MVIsZero
    308                 MOV     return,  #OMX_Sts_NoErr
    309                 M_END
    310 
    311         ENDIF ;// CortexA8
    312 
    313         END
    314 
    315 ;//-----------------------------------------------------------------------------------------------
    316 ;// armVCM4P10_Interpolate_Chroma_asm ends
    317 ;//-----------------------------------------------------------------------------------------------
    318 
    319