Home | History | Annotate | Download | only in arm_neon_asm_gcc
      1 @
      2 @ Copyright (C) 2009 The Android Open Source Project
      3 @
      4 @ Licensed under the Apache License, Version 2.0 (the "License");
      5 @ you may not use this file except in compliance with the License.
      6 @ You may obtain a copy of the License at
      7 @
      8 @      http://www.apache.org/licenses/LICENSE-2.0
      9 @
     10 @ Unless required by applicable law or agreed to in writing, software
     11 @ distributed under the License is distributed on an "AS IS" BASIS,
     12 @ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 @ See the License for the specific language governing permissions and
     14 @ limitations under the License.
     15 @
     16 
     17 #include "asm_common.S"
     18 
     19     preserve8
     20 
     21     .fpu neon
     22     .text
     23 
     24 /* Input / output registers */
     25 
     26 #define ref     r0
     27 #define fill    r1
     28 #define left    r2
     29 #define tmp2    r2
     30 #define center  r3
     31 #define right   r4
     32 #define tmp1    r5
     33 
     34 /* -- NEON registers -- */
     35 
     36 #define qTmp0     Q0.U8
     37 #define qTmp1     Q1.U8
     38 #define dTmp0     D0.U8
     39 #define dTmp1     D1.U8
     40 #define dTmp2     D2.U8
     41 #define dTmp3     D3.U8
     42 
     43 /*
     44 void h264bsdFillRow7(const u8 * ref, u8 * fill, i32 left, i32 center,
     45                      i32 right);
     46 */
     47 
     48 function h264bsdFillRow7, export=1
     49 
     50         PUSH     {r4-r6,lr}
     51         CMP      left, #0
     52         LDR      right, [sp,#0x10]
     53         BEQ      switch_center
     54         LDRB     tmp1, [ref,#0]
     55 
     56 loop_left:
     57         SUBS     left, left, #1
     58         STRB     tmp1, [fill], #1
     59         BNE      loop_left
     60 
     61 switch_center:
     62         ASR      tmp2,center,#2
     63         CMP      tmp2,#9
     64         ADDCC    pc,pc,tmp2,LSL #2
     65         B        loop_center
     66         B        loop_center
     67         B        case_1
     68         B        case_2
     69         B        case_3
     70         B        case_4
     71         B        case_5
     72         B        case_6
     73         B        case_7
     74         B        case_8
     75 
     76 case_8:
     77         VLD1    {qTmp0, qTmp1}, [ref]!
     78         SUB     center, center, #32
     79         VST1    {qTmp0}, [fill]!
     80         VST1    {qTmp1}, [fill]!
     81         B       loop_center
     82 case_7:
     83         VLD1    {dTmp0,dTmp1,dTmp2}, [ref]!
     84         SUB     center, center, #28
     85         LDR     tmp2, [ref], #4
     86         VST1    {dTmp0,dTmp1,dTmp2}, [fill]!
     87         STR     tmp2, [fill],#4
     88         B       loop_center
     89 case_6:
     90         VLD1    {dTmp0,dTmp1,dTmp2}, [ref]!
     91         SUB     center, center, #24
     92         VST1    {dTmp0,dTmp1,dTmp2}, [fill]!
     93         B       loop_center
     94 case_5:
     95         VLD1    {qTmp0}, [ref]!
     96         SUB     center, center, #20
     97         LDR     tmp2, [ref], #4
     98         VST1    {qTmp0}, [fill]!
     99         STR     tmp2, [fill],#4
    100         B       loop_center
    101 case_4:
    102         VLD1    {qTmp0}, [ref]!
    103         SUB     center, center, #16
    104         VST1    {qTmp0}, [fill]!
    105         B       loop_center
    106 case_3:
    107         VLD1    {dTmp0}, [ref]!
    108         SUB     center, center, #12
    109         LDR     tmp2, [ref], #4
    110         VST1    dTmp0, [fill]!
    111         STR     tmp2, [fill],#4
    112         B       loop_center
    113 case_2:
    114         LDR      tmp2, [ref],#4
    115         SUB      center, center, #4
    116         STR      tmp2, [fill], #4
    117 case_1:
    118         LDR      tmp2, [ref],#4
    119         SUB      center, center, #4
    120         STR      tmp2, [fill], #4
    121 
    122 loop_center:
    123         CMP      center, #0
    124         BEQ      jump
    125         LDRB     tmp2, [ref], #1
    126         SUB      center, center, #1
    127         STRB     tmp2, [fill], #1
    128         BNE      loop_center
    129 jump:
    130         CMP      right,#0
    131         POPEQ    {r4-r6,pc}
    132         LDRB     tmp2, [ref,#-1]
    133 
    134 loop_right:
    135         STRB     tmp2, [fill], #1
    136         SUBS     right, right, #1
    137         BNE      loop_right
    138 
    139         POP      {r4-r6,pc}
    140 
    141 endfunction
    142 
    143 
    144