Home | History | Annotate | Download | only in arm_neon_asm
      1 ;
      2 ; Copyright (C) 2009 The Android Open Source Project
      3 ;
      4 ; Licensed under the Apache License, Version 2.0 (the "License");
      5 ; you may not use this file except in compliance with the License.
      6 ; You may obtain a copy of the License at
      7 ;
      8 ;      http://www.apache.org/licenses/LICENSE-2.0
      9 ;
     10 ; Unless required by applicable law or agreed to in writing, software
     11 ; distributed under the License is distributed on an "AS IS" BASIS,
     12 ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ; See the License for the specific language governing permissions and
     14 ; limitations under the License.
     15 ;
     16 
     17     REQUIRE8
     18     PRESERVE8
     19 
     20     AREA    |.text|, CODE
     21 
     22     EXPORT h264bsdWriteMacroblock
     23 
     24 ; Input / output registers
     25 image   RN  0
     26 data    RN  1
     27 width   RN  2
     28 luma    RN  3
     29 cb      RN  4
     30 cr      RN  5
     31 cwidth  RN  6
     32 
     33 ; -- NEON registers --
     34 
     35 qRow0   QN  Q0.U8
     36 qRow1   QN  Q1.U8
     37 qRow2   QN  Q2.U8
     38 qRow3   QN  Q3.U8
     39 qRow4   QN  Q4.U8
     40 qRow5   QN  Q5.U8
     41 qRow6   QN  Q6.U8
     42 qRow7   QN  Q7.U8
     43 qRow8   QN  Q8.U8
     44 qRow9   QN  Q9.U8
     45 qRow10  QN  Q10.U8
     46 qRow11  QN  Q11.U8
     47 qRow12  QN  Q12.U8
     48 qRow13  QN  Q13.U8
     49 qRow14  QN  Q14.U8
     50 qRow15  QN  Q15.U8
     51 
     52 dRow0   DN  D0.U8
     53 dRow1   DN  D1.U8
     54 dRow2   DN  D2.U8
     55 dRow3   DN  D3.U8
     56 dRow4   DN  D4.U8
     57 dRow5   DN  D5.U8
     58 dRow6   DN  D6.U8
     59 dRow7   DN  D7.U8
     60 dRow8   DN  D8.U8
     61 dRow9   DN  D9.U8
     62 dRow10  DN  D10.U8
     63 dRow11  DN  D11.U8
     64 dRow12  DN  D12.U8
     65 dRow13  DN  D13.U8
     66 dRow14  DN  D14.U8
     67 dRow15  DN  D15.U8
     68 
     69 ;/*------------------------------------------------------------------------------
     70 ;
     71 ;    Function: h264bsdWriteMacroblock
     72 ;
     73 ;        Functional description:
     74 ;            Write one macroblock into the image. Both luma and chroma
     75 ;            components will be written at the same time.
     76 ;
     77 ;        Inputs:
     78 ;            data    pointer to macroblock data to be written, 256 values for
     79 ;                    luma followed by 64 values for both chroma components
     80 ;
     81 ;        Outputs:
     82 ;            image   pointer to the image where the macroblock will be written
     83 ;
     84 ;        Returns:
     85 ;            none
     86 ;
     87 ;------------------------------------------------------------------------------*/
     88 
     89 h264bsdWriteMacroblock
     90     PUSH    {r4-r6,lr}
     91     VPUSH   {q4-q7}
     92 
     93     LDR     width, [image, #4]
     94     LDR     luma, [image, #0xC]
     95     LDR     cb, [image, #0x10]
     96     LDR     cr, [image, #0x14]
     97 
     98 
     99 ;   Write luma
    100     VLD1    {qRow0, qRow1}, [data]!
    101     LSL     width, width, #4
    102     VLD1    {qRow2, qRow3}, [data]!
    103     LSR     cwidth, width, #1
    104     VST1    {qRow0}, [luma@128], width
    105     VLD1    {qRow4, qRow5}, [data]!
    106     VST1    {qRow1}, [luma@128], width
    107     VLD1    {qRow6, qRow7}, [data]!
    108     VST1    {qRow2}, [luma@128], width
    109     VLD1    {qRow8, qRow9}, [data]!
    110     VST1    {qRow3}, [luma@128], width
    111     VLD1    {qRow10, qRow11}, [data]!
    112     VST1    {qRow4}, [luma@128], width
    113     VLD1    {qRow12, qRow13}, [data]!
    114     VST1    {qRow5}, [luma@128], width
    115     VLD1    {qRow14, qRow15}, [data]!
    116     VST1    {qRow6}, [luma@128], width
    117 
    118     VLD1    {qRow0, qRow1}, [data]! ;cb rows 0,1,2,3
    119     VST1    {qRow7}, [luma@128], width
    120     VLD1    {qRow2, qRow3}, [data]! ;cb rows 4,5,6,7
    121     VST1    {qRow8}, [luma@128], width
    122     VLD1    {qRow4, qRow5}, [data]! ;cr rows 0,1,2,3
    123     VST1    {qRow9}, [luma@128], width
    124     VLD1    {qRow6, qRow7}, [data]! ;cr rows 4,5,6,7
    125     VST1    {qRow10}, [luma@128], width
    126     VST1    {dRow0}, [cb@64], cwidth
    127     VST1    {dRow8}, [cr@64], cwidth
    128     VST1    {qRow11}, [luma@128], width
    129     VST1    {dRow1}, [cb@64], cwidth
    130     VST1    {dRow9}, [cr@64], cwidth
    131     VST1    {qRow12}, [luma@128], width
    132     VST1    {dRow2}, [cb@64], cwidth
    133     VST1    {dRow10}, [cr@64], cwidth
    134     VST1    {qRow13}, [luma@128], width
    135     VST1    {dRow3}, [cb@64], cwidth
    136     VST1    {dRow11}, [cr@64], cwidth
    137     VST1    {qRow14}, [luma@128], width
    138     VST1    {dRow4}, [cb@64], cwidth
    139     VST1    {dRow12}, [cr@64], cwidth
    140     VST1    {qRow15}, [luma]
    141     VST1    {dRow5}, [cb@64], cwidth
    142     VST1    {dRow13}, [cr@64], cwidth
    143     VST1    {dRow6}, [cb@64], cwidth
    144     VST1    {dRow14}, [cr@64], cwidth
    145     VST1    {dRow7}, [cb@64]
    146     VST1    {dRow15}, [cr@64]
    147 
    148     VPOP    {q4-q7}
    149     POP     {r4-r6,pc}
    150     END
    151 
    152 
    153