Home | History | Annotate | Download | only in arm_neon_asm
      1 ;
      2 ; Copyright (C) 2009 The Android Open Source Project
      3 ;
      4 ; Licensed under the Apache License, Version 2.0 (the "License");
      5 ; you may not use this file except in compliance with the License.
      6 ; You may obtain a copy of the License at
      7 ;
      8 ;      http://www.apache.org/licenses/LICENSE-2.0
      9 ;
     10 ; Unless required by applicable law or agreed to in writing, software
     11 ; distributed under the License is distributed on an "AS IS" BASIS,
     12 ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ; See the License for the specific language governing permissions and
     14 ; limitations under the License.
     15 ;
     16 
     17     REQUIRE8
     18     PRESERVE8
     19 
     20     AREA    |.text|, CODE
     21 
     22     EXPORT h264bsdFillRow7
     23 
     24 ; Input / output registers
     25 
     26 ref     RN 0
     27 fill    RN 1
     28 left    RN 2
     29 tmp2    RN 2
     30 center  RN 3
     31 right   RN 4
     32 tmp1    RN 5
     33 
     34 ; -- NEON registers --
     35 
     36 qTmp0   QN  Q0.U8
     37 qTmp1   QN  Q1.U8
     38 dTmp0   DN  D0.U8
     39 dTmp1   DN  D1.U8
     40 dTmp2   DN  D2.U8
     41 dTmp3   DN  D3.U8
     42 
     43 
     44 ;/*------------------------------------------------------------------------------
     45 ;
     46 ;    Function: h264bsdFillRow7
     47 ;
     48 ;        Functional description:
     49 ;
     50 ;        Inputs:
     51 ;
     52 ;        Outputs:
     53 ;
     54 ;        Returns:
     55 ;
     56 ;------------------------------------------------------------------------------*/
     57 
     58 h264bsdFillRow7
     59         PUSH     {r4-r6,lr}
     60         CMP      left, #0
     61         LDR      right, [sp,#0x10]
     62         BEQ      switch_center
     63         LDRB     tmp1, [ref,#0]
     64 
     65 loop_left
     66         SUBS     left, left, #1
     67         STRB     tmp1, [fill], #1
     68         BNE      loop_left
     69 
     70 switch_center
     71         ASR      tmp2,center,#2
     72         CMP      tmp2,#9
     73         ADDCC    pc,pc,tmp2,LSL #2
     74         B        loop_center
     75         B        loop_center
     76         B        case_1
     77         B        case_2
     78         B        case_3
     79         B        case_4
     80         B        case_5
     81         B        case_6
     82         B        case_7
     83         B        case_8
     84 ;case_8
     85 ;        LDR      tmp2, [ref], #4
     86 ;        SUB      center, center, #4
     87 ;        STR      tmp2, [fill], #4
     88 ;case_7
     89 ;        LDR      tmp2, [ref], #4
     90 ;        SUB      center, center, #4
     91 ;        STR      tmp2, [fill], #4
     92 ;case_6
     93 ;        LDR      tmp2, [ref], #4
     94 ;        SUB      center, center, #4
     95 ;        STR      tmp2, [fill],#4
     96 ;case_5
     97 ;        LDR      tmp2, [ref], #4
     98 ;        SUB      center, center, #4
     99 ;        STR      tmp2, [fill],#4
    100 ;case_4
    101 ;        LDR      tmp2, [ref],#4
    102 ;        SUB      center, center, #4
    103 ;        STR      tmp2, [fill], #4
    104 ;case_3
    105 ;        LDR      tmp2, [ref],#4
    106 ;        SUB      center, center, #4
    107 ;        STR      tmp2, [fill], #4
    108 ;case_2
    109 ;        LDR      tmp2, [ref],#4
    110 ;        SUB      center, center, #4
    111 ;        STR      tmp2, [fill], #4
    112 ;case_1
    113 ;        LDR      tmp2, [ref],#4
    114 ;        SUB      center, center, #4
    115 ;        STR      tmp2, [fill], #4
    116 
    117 case_8
    118         VLD1    {qTmp0, qTmp1}, [ref]!
    119         SUB     center, center, #32
    120         VST1    qTmp0, [fill]!
    121         VST1    qTmp1, [fill]!
    122         B       loop_center
    123 case_7
    124         VLD1    {dTmp0,dTmp1,dTmp2}, [ref]!
    125         SUB     center, center, #28
    126         LDR     tmp2, [ref], #4
    127         VST1    {dTmp0,dTmp1,dTmp2}, [fill]!
    128         STR     tmp2, [fill],#4
    129         B       loop_center
    130 case_6
    131         VLD1    {dTmp0,dTmp1,dTmp2}, [ref]!
    132         SUB     center, center, #24
    133         VST1    {dTmp0,dTmp1,dTmp2}, [fill]!
    134         B       loop_center
    135 case_5
    136         VLD1    qTmp0, [ref]!
    137         SUB     center, center, #20
    138         LDR     tmp2, [ref], #4
    139         VST1    qTmp0, [fill]!
    140         STR     tmp2, [fill],#4
    141         B       loop_center
    142 case_4
    143         VLD1    qTmp0, [ref]!
    144         SUB     center, center, #16
    145         VST1    qTmp0, [fill]!
    146         B       loop_center
    147 case_3
    148         VLD1    dTmp0, [ref]!
    149         SUB     center, center, #12
    150         LDR     tmp2, [ref], #4
    151         VST1    dTmp0, [fill]!
    152         STR     tmp2, [fill],#4
    153         B       loop_center
    154 case_2
    155         LDR      tmp2, [ref],#4
    156         SUB      center, center, #4
    157         STR      tmp2, [fill], #4
    158 case_1
    159         LDR      tmp2, [ref],#4
    160         SUB      center, center, #4
    161         STR      tmp2, [fill], #4
    162 
    163 loop_center
    164         CMP      center, #0
    165         LDRBNE   tmp2, [ref], #1
    166         SUBNE    center, center, #1
    167         STRBNE   tmp2, [fill], #1
    168         BNE      loop_center
    169         CMP      right,#0
    170         POPEQ    {r4-r6,pc}
    171         LDRB     tmp2, [ref,#-1]
    172 
    173 loop_right
    174         STRB     tmp2, [fill], #1
    175         SUBS     right, right, #1
    176         BNE      loop_right
    177 
    178         POP      {r4-r6,pc}
    179         END
    180 
    181