Home | History | Annotate | Download | only in armv7
      1 @/******************************************************************************
      2 @ *
      3 @ * Copyright (C) 2018 The Android Open Source Project
      4 @ *
      5 @ * Licensed under the Apache License, Version 2.0 (the "License");
      6 @ * you may not use this file except in compliance with the License.
      7 @ * You may obtain a copy of the License at:
      8 @ *
      9 @ * http:@www.apache.org/licenses/LICENSE-2.0
     10 @ *
     11 @ * Unless required by applicable law or agreed to in writing, software
     12 @ * distributed under the License is distributed on an "AS IS" BASIS,
     13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @ * See the License for the specific language governing permissions and
     15 @ * limitations under the License.
     16 @ *
     17 @ *****************************************************************************
     18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 @*/
     20 
     21 
     22 .text
     23 .p2align 2
     24 
     25     .global ixheaacd_esbr_cos_sin_mod_loop2
     26 ixheaacd_esbr_cos_sin_mod_loop2:
     27 
     28     STMFD           sp!, {r4-r12, r14}
     29     VPUSH           {D8-D15}
     30     @generating load addresses
     31     ADD             R3, R0, R2, LSL #3  @psubband1 = &subband[2 * M - 1];
     32     SUB             R3, R3, #4
     33     ADD             R10, R0, #256
     34     ADD             R11, R10, R2, LSL #3
     35     SUB             R11, R11, #4
     36     MOV             R8, #-4
     37     LDR             R6, [R0]
     38     MOV             R4, R2, ASR #1      @M_2 = ixheaacd_shr32(M, 1);
     39     SUB             R4, R4, #1
     40 
     41     ASR             R6, R6, #1          @*psubband = *psubband >> 1;
     42     VLD1.32         {D2[0]}, [R3]
     43 
     44     STR             R6, [R0], #4        @psubband++;
     45     LDR             R7, [R0]
     46     ASR             R7, R7, #1
     47     RSB             R6, R7, #0
     48     STR             R6, [R3], #-4
     49     VLD1.32         {D3[0]}, [R3]       @  im = *psubband1;
     50 
     51     VLD2.32         {D0[0], D1[0]}, [R1]!
     52     VDUP.32         D0, D0[0]
     53     VDUP.32         D1, D1[0]
     54 
     55     VLD1.32         {D2[1]}, [R11]      @re = *psubband12;
     56 
     57     LDR             R6, [R10]
     58     ASR             R7, R6, #1
     59     MOV             R9, #0
     60     QSUB            R7, R9, R7
     61 
     62     STR             R7, [R11], #-4
     63 
     64     LDR             R6, [R10, #4]
     65     ASR             R6, R6, #1
     66     STR             R6, [R10], #4
     67 
     68     VLD1.32         {D3[1]}, [R11]
     69 
     70     VMULL.S32       q2, d0, d2          @qsub 2nd
     71     VMULL.S32       q3, d0, d3          @add 2nd
     72     VMULL.S32       q4, d1, d2          @add 1st
     73     VMULL.S32       q5, d1, d3          @qsub 1st
     74 
     75     vadd.I64        q6, q4, q3
     76     VQSUB.S64       Q7, Q5, Q2
     77     VQSUB.S64       Q8, Q2, Q5
     78 
     79     VSHRN.I64       D12, Q6, #32
     80     VSHRN.I64       D14, Q7, #32
     81     VSHRN.I64       D16, Q8, #32
     82 
     83     VST1.32         {D12[0]}, [R3], R8
     84 
     85     VST1.32         {D14[0]}, [R0]!
     86 
     87     VQNEG.S32       D12, D12
     88 
     89 
     90     VST1.32         {D12[1]}, [R10]!
     91 
     92     VST1.32         {D16[1]}, [R11], R8
     93 
     94 LOOP1:
     95     VLD1.32         {D2}, [R0]
     96     VLD1.32         {D3}, [R10]
     97     LDR             R5, [R3]            @RE2
     98     LDR             R6, [R11]           @RE3
     99     VTRN.32         D2, D3
    100 
    101     VMULL.S32       q2, d0, d2          @qsub 2nd
    102     VMULL.S32       q3, d0, d3          @add 2nd
    103     VMULL.S32       q4, d1, d2          @add 1st
    104     VMULL.S32       q5, d1, d3          @qsub 1st
    105 
    106     vadd.I64        q6, q4, q3
    107     VQSUB.S64       Q7, Q2, Q5
    108     VQSUB.S64       Q8, Q5, Q2
    109 
    110     VSHRN.I64       D12, Q6, #32
    111     VSHRN.I64       D14, Q7, #32
    112     VSHRN.I64       D16, Q8, #32
    113 
    114     VST1.32         {D12[0]}, [R0]!
    115     VST1.32         {D14[0]}, [R3], R8
    116     VQNEG.S32       D12, D12
    117 
    118     VST1.32         {D12[1]}, [R11], R8
    119     VST1.32         {D16[1]}, [R10]!
    120 
    121     @ second part
    122     VLD2.32         {D0[0], D1[0]}, [R1]!
    123     VDUP.32         D0, D0[0]
    124     VDUP.32         D1, D1[0]
    125 
    126     VMOV            D3, R5, R6
    127     VLD1.32         {D2[0]}, [R3]
    128     VLD1.32         {D2[1]}, [R11]
    129 
    130     VMULL.S32       q2, d0, d2          @qsub 2nd
    131     VMULL.S32       q3, d0, d3          @add 2nd
    132     VMULL.S32       q4, d1, d2          @add 1st
    133     VMULL.S32       q5, d1, d3          @qsub 1st
    134 
    135     vadd.I64        q6, q2, q5
    136     VQSUB.S64       Q7, Q4, Q3
    137     VQSUB.S64       Q8, Q3, Q4
    138 
    139     VSHRN.I64       D12, Q6, #32
    140     VSHRN.I64       D14, Q7, #32
    141     VSHRN.I64       D16, Q8, #32
    142 
    143     VST1.32         {D12[0]}, [R3], R8
    144     VST1.32         {D14[0]}, [R0]!
    145 
    146     VQNEG.S32       D12, D12
    147 
    148     subs            r4, r4, #1
    149     VST1.32         {D12[1]}, [R10]!
    150     VST1.32         {D16[1]}, [R11], R8
    151 
    152     BGT             LOOP1
    153     VPOP            {D8-D15}
    154     LDMFD           sp!, {r4-r12, r15}
    155 
    156 
    157 
    158 
    159 
    160 
    161 
    162 
    163 
    164 
    165 
    166 
    167 
    168 
    169 
    170 
    171 
    172 
    173 
    174 
    175 
    176 
    177 
    178 
    179 
    180 
    181