Home | History | Annotate | Download | only in arm_neon_asm_gcc
      1 @
      2 @ Copyright (C) 2009 The Android Open Source Project
      3 @
      4 @ Licensed under the Apache License, Version 2.0 (the "License");
      5 @ you may not use this file except in compliance with the License.
      6 @ You may obtain a copy of the License at
      7 @
      8 @      http://www.apache.org/licenses/LICENSE-2.0
      9 @
     10 @ Unless required by applicable law or agreed to in writing, software
     11 @ distributed under the License is distributed on an "AS IS" BASIS,
     12 @ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 @ See the License for the specific language governing permissions and
     14 @ limitations under the License.
     15 @
     16 
     17 #include "asm_common.S"
     18 
     19     require8
     20     preserve8
     21 
     22     .arm
     23     .fpu neon
     24     .text
     25 
     26 /* Input / output registers */
     27 #define image   r0
     28 #define data    r1
     29 #define width   r2
     30 #define luma    r3
     31 #define cb      r4
     32 #define cr      r5
     33 #define cwidth  r6
     34 
     35 /* -- NEON registers -- */
     36 
     37 #define qRow0     Q0.U8
     38 #define qRow1     Q1.U8
     39 #define qRow2     Q2.U8
     40 #define qRow3     Q3.U8
     41 #define qRow4     Q4.U8
     42 #define qRow5     Q5.U8
     43 #define qRow6     Q6.U8
     44 #define qRow7     Q7.U8
     45 #define qRow8     Q8.U8
     46 #define qRow9     Q9.U8
     47 #define qRow10    Q10.U8
     48 #define qRow11    Q11.U8
     49 #define qRow12    Q12.U8
     50 #define qRow13    Q13.U8
     51 #define qRow14    Q14.U8
     52 #define qRow15    Q15.U8
     53 
     54 #define dRow0     D0.U8
     55 #define dRow1     D1.U8
     56 #define dRow2     D2.U8
     57 #define dRow3     D3.U8
     58 #define dRow4     D4.U8
     59 #define dRow5     D5.U8
     60 #define dRow6     D6.U8
     61 #define dRow7     D7.U8
     62 #define dRow8     D8.U8
     63 #define dRow9     D9.U8
     64 #define dRow10    D10.U8
     65 #define dRow11    D11.U8
     66 #define dRow12    D12.U8
     67 #define dRow13    D13.U8
     68 #define dRow14    D14.U8
     69 #define dRow15    D15.U8
     70 
     71 /*------------------------------------------------------------------------------
     72 
     73     Function: h264bsdWriteMacroblock
     74 
     75         Functional description:
     76             Write one macroblock into the image. Both luma and chroma
     77             components will be written at the same time.
     78 
     79         Inputs:
     80             data    pointer to macroblock data to be written, 256 values for
     81                     luma followed by 64 values for both chroma components
     82 
     83         Outputs:
     84             image   pointer to the image where the macroblock will be written
     85 
     86         Returns:
     87             none
     88 
     89 ------------------------------------------------------------------------------*/
     90 
     91 function h264bsdWriteMacroblock, export=1
     92     PUSH    {r4-r6,lr}
     93     VPUSH   {q4-q7}
     94 
     95     LDR     width, [image, #4]
     96     LDR     luma, [image, #0xC]
     97     LDR     cb, [image, #0x10]
     98     LDR     cr, [image, #0x14]
     99 
    100 
    101 @   Write luma
    102     VLD1    {qRow0, qRow1}, [data]!
    103     LSL     width, width, #4
    104     VLD1    {qRow2, qRow3}, [data]!
    105     LSR     cwidth, width, #1
    106     VST1    {qRow0}, [luma,:128], width
    107     VLD1    {qRow4, qRow5}, [data]!
    108     VST1    {qRow1}, [luma,:128], width
    109     VLD1    {qRow6, qRow7}, [data]!
    110     VST1    {qRow2}, [luma,:128], width
    111     VLD1    {qRow8, qRow9}, [data]!
    112     VST1    {qRow3}, [luma,:128], width
    113     VLD1    {qRow10, qRow11}, [data]!
    114     VST1    {qRow4}, [luma,:128], width
    115     VLD1    {qRow12, qRow13}, [data]!
    116     VST1    {qRow5}, [luma,:128], width
    117     VLD1    {qRow14, qRow15}, [data]!
    118     VST1    {qRow6}, [luma,:128], width
    119 
    120     VLD1    {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3
    121     VST1    {qRow7}, [luma,:128], width
    122     VLD1    {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7
    123     VST1    {qRow8}, [luma,:128], width
    124     VLD1    {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3
    125     VST1    {qRow9}, [luma,:128], width
    126     VLD1    {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7
    127     VST1    {qRow10}, [luma,:128], width
    128     VST1    {dRow0}, [cb,:64], cwidth
    129     VST1    {dRow8}, [cr,:64], cwidth
    130     VST1    {qRow11}, [luma,:128], width
    131     VST1    {dRow1}, [cb,:64], cwidth
    132     VST1    {dRow9}, [cr,:64], cwidth
    133     VST1    {qRow12}, [luma,:128], width
    134     VST1    {dRow2}, [cb,:64], cwidth
    135     VST1    {dRow10}, [cr,:64], cwidth
    136     VST1    {qRow13}, [luma,:128], width
    137     VST1    {dRow3}, [cb,:64], cwidth
    138     VST1    {dRow11}, [cr,:64], cwidth
    139     VST1    {qRow14}, [luma,:128], width
    140     VST1    {dRow4}, [cb,:64], cwidth
    141     VST1    {dRow12}, [cr,:64], cwidth
    142     VST1    {qRow15}, [luma]
    143     VST1    {dRow5}, [cb,:64], cwidth
    144     VST1    {dRow13}, [cr,:64], cwidth
    145     VST1    {dRow6}, [cb,:64], cwidth
    146     VST1    {dRow14}, [cr,:64], cwidth
    147     VST1    {dRow7}, [cb,:64]
    148     VST1    {dRow15}, [cr,:64]
    149 
    150     VPOP    {q4-q7}
    151     POP     {r4-r6,pc}
    152 @    BX      lr
    153 
    154     .endfunc
    155 
    156 
    157 
    158