Home | History | Annotate | Download | only in arm_neon_asm_gcc
      1 @
      2 @ Copyright (C) 2009 The Android Open Source Project
      3 @
      4 @ Licensed under the Apache License, Version 2.0 (the "License");
      5 @ you may not use this file except in compliance with the License.
      6 @ You may obtain a copy of the License at
      7 @
      8 @      http://www.apache.org/licenses/LICENSE-2.0
      9 @
     10 @ Unless required by applicable law or agreed to in writing, software
     11 @ distributed under the License is distributed on an "AS IS" BASIS,
     12 @ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 @ See the License for the specific language governing permissions and
     14 @ limitations under the License.
     15 @
     16 
     17 #include "asm_common.S"
     18 
     19     REQUIRE8
     20     PRESERVE8
     21 
     22     .arm
     23     .fpu neon
     24     .text
     25 
     26 /* Input / output registers */
     27 #define image   r0
     28 #define data    r1
     29 #define width   r2
     30 #define luma    r3
     31 #define cb      r4
     32 #define cr      r5
     33 #define cwidth  r6
     34 
     35 /* -- NEON registers -- */
     36 
     37 #define qRow0     Q0
     38 #define qRow1     Q1
     39 #define qRow2     Q2
     40 #define qRow3     Q3
     41 #define qRow4     Q4
     42 #define qRow5     Q5
     43 #define qRow6     Q6
     44 #define qRow7     Q7
     45 #define qRow8     Q8
     46 #define qRow9     Q9
     47 #define qRow10    Q10
     48 #define qRow11    Q11
     49 #define qRow12    Q12
     50 #define qRow13    Q13
     51 #define qRow14    Q14
     52 #define qRow15    Q15
     53 
     54 #define dRow0     D0
     55 #define dRow1     D1
     56 #define dRow2     D2
     57 #define dRow3     D3
     58 #define dRow4     D4
     59 #define dRow5     D5
     60 #define dRow6     D6
     61 #define dRow7     D7
     62 #define dRow8     D8
     63 #define dRow9     D9
     64 #define dRow10    D10
     65 #define dRow11    D11
     66 #define dRow12    D12
     67 #define dRow13    D13
     68 #define dRow14    D14
     69 #define dRow15    D15
     70 
     71 /*------------------------------------------------------------------------------
     72 
     73     Function: h264bsdWriteMacroblock
     74 
     75         Functional description:
     76             Write one macroblock into the image. Both luma and chroma
     77             components will be written at the same time.
     78 
     79         Inputs:
     80             data    pointer to macroblock data to be written, 256 values for
     81                     luma followed by 64 values for both chroma components
     82 
     83         Outputs:
     84             image   pointer to the image where the macroblock will be written
     85 
     86         Returns:
     87             none
     88 
     89 ------------------------------------------------------------------------------*/
     90 
     91 function h264bsdWriteMacroblock, export=1
     92     PUSH    {r4-r6,lr}
     93     VPUSH   {q4-q7}
     94 
     95     LDR     width, [image, #4]
     96     LDR     luma, [image, #0xC]
     97     LDR     cb, [image, #0x10]
     98     LDR     cr, [image, #0x14]
     99 
    100 
    101 @   Write luma
    102     VLD1.8  {qRow0, qRow1}, [data]!
    103     LSL     width, width, #4
    104     VLD1.8  {qRow2, qRow3}, [data]!
    105     LSR     cwidth, width, #1
    106     VST1.8  {qRow0}, [luma,:128], width
    107     VLD1.8  {qRow4, qRow5}, [data]!
    108     VST1.8  {qRow1}, [luma,:128], width
    109     VLD1.8  {qRow6, qRow7}, [data]!
    110     VST1.8  {qRow2}, [luma,:128], width
    111     VLD1.8  {qRow8, qRow9}, [data]!
    112     VST1.8  {qRow3}, [luma,:128], width
    113     VLD1.8  {qRow10, qRow11}, [data]!
    114     VST1.8  {qRow4}, [luma,:128], width
    115     VLD1.8  {qRow12, qRow13}, [data]!
    116     VST1.8  {qRow5}, [luma,:128], width
    117     VLD1.8  {qRow14, qRow15}, [data]!
    118     VST1.8  {qRow6}, [luma,:128], width
    119 
    120     VLD1.8  {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3
    121     VST1.8  {qRow7}, [luma,:128], width
    122     VLD1.8  {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7
    123     VST1.8  {qRow8}, [luma,:128], width
    124     VLD1.8  {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3
    125     VST1.8  {qRow9}, [luma,:128], width
    126     VLD1.8  {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7
    127     VST1.8  {qRow10}, [luma,:128], width
    128     VST1.8  {dRow0}, [cb,:64], cwidth
    129     VST1.8  {dRow8}, [cr,:64], cwidth
    130     VST1.8  {qRow11}, [luma,:128], width
    131     VST1.8  {dRow1}, [cb,:64], cwidth
    132     VST1.8  {dRow9}, [cr,:64], cwidth
    133     VST1.8  {qRow12}, [luma,:128], width
    134     VST1.8  {dRow2}, [cb,:64], cwidth
    135     VST1.8  {dRow10}, [cr,:64], cwidth
    136     VST1.8  {qRow13}, [luma,:128], width
    137     VST1.8  {dRow3}, [cb,:64], cwidth
    138     VST1.8  {dRow11}, [cr,:64], cwidth
    139     VST1.8  {qRow14}, [luma,:128], width
    140     VST1.8  {dRow4}, [cb,:64], cwidth
    141     VST1.8  {dRow12}, [cr,:64], cwidth
    142     VST1.8  {qRow15}, [luma]
    143     VST1.8  {dRow5}, [cb,:64], cwidth
    144     VST1.8  {dRow13}, [cr,:64], cwidth
    145     VST1.8  {dRow6}, [cb,:64], cwidth
    146     VST1.8  {dRow14}, [cr,:64], cwidth
    147     VST1.8  {dRow7}, [cb,:64]
    148     VST1.8  {dRow15}, [cr,:64]
    149 
    150     VPOP    {q4-q7}
    151     POP     {r4-r6,pc}
    152 @    BX      lr
    153 
    154 
    155 
    156 
    157