Home | History | Annotate | Download | only in ARMV5E
      1 @/*
      2 @ ** Copyright 2003-2010, VisualOn, Inc.
      3 @ **
      4 @ ** Licensed under the Apache License, Version 2.0 (the "License");
      5 @ ** you may not use this file except in compliance with the License.
      6 @ ** You may obtain a copy of the License at
      7 @ **
      8 @ **     http://www.apache.org/licenses/LICENSE-2.0
      9 @ **
     10 @ ** Unless required by applicable law or agreed to in writing, software
     11 @ ** distributed under the License is distributed on an "AS IS" BASIS,
     12 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 @ ** See the License for the specific language governing permissions and
     14 @ ** limitations under the License.
     15 @ */
     16 
     17 
     18 @*void Convolve (
     19 @*    Word16 x[],        /* (i)     : input vector                           */
     20 @*    Word16 h[],        /* (i)     : impulse response                       */
     21 @*    Word16 y[],        /* (o)     : output vector                          */
     22 @*    Word16 L           /* (i)     : vector size                            */
     23 @*)
     24 @  r0 --- x[]
     25 @  r1 --- h[]
     26 @  r2 --- y[]
     27 @  r3 --- L
     28 
     29 	.section  .text
     30         .global   Convolve_asm
     31 
     32 Convolve_asm:
     33 
     34         STMFD          r13!, {r4 - r12, r14}
     35         MOV            r3,  #0                           @ n
     36 	MOV            r11, #0x8000
     37 
     38 LOOP:
     39         ADD            r4, r1, r3, LSL #1                @ tmpH address
     40         ADD            r5, r3, #1                        @ i = n + 1
     41         MOV            r6, r0                            @ tmpX = x
     42         LDRSH          r9,  [r6], #2                     @ *tmpX++
     43         LDRSH          r10, [r4], #-2                    @ *tmpH--
     44         SUB            r5, r5, #1
     45         MUL            r8,  r9, r10
     46 
     47 LOOP1:
     48         CMP            r5, #0
     49         BLE            L1
     50 	LDRSH          r9,  [r6], #2                     @ *tmpX++
     51 	LDRSH          r10, [r4], #-2                    @ *tmpH--
     52 	LDRSH          r12, [r6], #2                     @ *tmpX++
     53 	LDRSH          r14, [r4], #-2                    @ *tmpH--
     54 	MLA            r8, r9, r10, r8
     55 	MLA            r8, r12, r14, r8
     56 	LDRSH          r9,  [r6], #2                     @ *tmpX++
     57 	LDRSH          r10, [r4], #-2                    @ *tmpH--
     58 	LDRSH          r12, [r6], #2                     @ *tmpX++
     59 	LDRSH          r14, [r4], #-2                    @ *tmpH--
     60 	MLA            r8, r9, r10, r8
     61         SUBS           r5, r5, #4
     62 	MLA            r8, r12, r14, r8
     63 
     64         B              LOOP1
     65 
     66 L1:
     67 
     68         ADD            r5, r11, r8, LSL #1
     69         MOV            r5, r5, LSR #16                   @extract_h(s)
     70         ADD            r3, r3, #1
     71         STRH           r5, [r2], #2                      @y[n]
     72 
     73 
     74         ADD            r4, r1, r3, LSL #1                @tmpH address
     75         ADD            r5, r3, #1
     76         MOV            r6, r0
     77         LDRSH          r9,  [r6], #2                     @ *tmpX++
     78         LDRSH          r10, [r4], #-2
     79         LDRSH          r12, [r6], #2
     80         LDRSH          r14, [r4], #-2
     81 
     82         MUL            r8, r9, r10
     83         SUB            r5, r5, #2
     84         MLA            r8, r12, r14, r8
     85 
     86 LOOP2:
     87         CMP            r5, #0
     88         BLE            L2
     89 	LDRSH          r9,  [r6], #2                     @ *tmpX++
     90 	LDRSH          r10, [r4], #-2                    @ *tmpH--
     91 	LDRSH          r12, [r6], #2                     @ *tmpX++
     92 	LDRSH          r14, [r4], #-2                    @ *tmpH--
     93 	MLA            r8, r9, r10, r8
     94 	MLA            r8, r12, r14, r8
     95 	LDRSH          r9,  [r6], #2                     @ *tmpX++
     96 	LDRSH          r10, [r4], #-2                    @ *tmpH--
     97 	LDRSH          r12, [r6], #2                     @ *tmpX++
     98 	LDRSH          r14, [r4], #-2                    @ *tmpH--
     99 	MLA            r8, r9, r10, r8
    100         SUBS           r5, r5, #4
    101 	MLA            r8, r12, r14, r8
    102         B              LOOP2
    103 
    104 L2:
    105         ADD            r8, r11, r8, LSL #1
    106         MOV            r8, r8, LSR #16                   @extract_h(s)
    107         ADD            r3, r3, #1
    108         STRH           r8, [r2], #2                      @y[n]
    109 
    110         ADD            r4, r1, r3, LSL #1
    111         ADD            r5, r3, #1
    112         MOV            r6, r0
    113         LDRSH          r9,  [r6], #2
    114         LDRSH          r10, [r4], #-2
    115         LDRSH          r12, [r6], #2
    116         LDRSH          r14, [r4], #-2
    117         MUL            r8, r9, r10
    118         LDRSH          r9,  [r6], #2
    119         LDRSH          r10, [r4], #-2
    120         MLA            r8, r12, r14, r8
    121         SUB            r5, r5, #3
    122         MLA            r8, r9, r10, r8
    123 
    124 LOOP3:
    125         CMP            r5, #0
    126         BLE            L3
    127 	LDRSH          r9,  [r6], #2                     @ *tmpX++
    128 	LDRSH          r10, [r4], #-2                    @ *tmpH--
    129 	LDRSH          r12, [r6], #2                     @ *tmpX++
    130 	LDRSH          r14, [r4], #-2                    @ *tmpH--
    131 	MLA            r8, r9, r10, r8
    132 	MLA            r8, r12, r14, r8
    133 	LDRSH          r9,  [r6], #2                     @ *tmpX++
    134 	LDRSH          r10, [r4], #-2                    @ *tmpH--
    135 	LDRSH          r12, [r6], #2                     @ *tmpX++
    136 	LDRSH          r14, [r4], #-2                    @ *tmpH--
    137 	MLA            r8, r9, r10, r8
    138         SUBS           r5, r5, #4
    139 	MLA            r8, r12, r14, r8
    140         B              LOOP3
    141 
    142 L3:
    143         ADD            r8, r11, r8, LSL #1
    144         MOV            r8, r8, LSR #16                   @extract_h(s)
    145         ADD            r3, r3, #1
    146         STRH           r8, [r2], #2                      @y[n]
    147 
    148         ADD            r5, r3, #1                        @ i = n + 1
    149         ADD            r4, r1, r3, LSL #1                @ tmpH address
    150         MOV            r6, r0
    151         MOV            r8, #0
    152 
    153 LOOP4:
    154         CMP            r5, #0
    155         BLE            L4
    156 	LDRSH          r9,  [r6], #2                     @ *tmpX++
    157 	LDRSH          r10, [r4], #-2                    @ *tmpH--
    158 	LDRSH          r12, [r6], #2                     @ *tmpX++
    159 	LDRSH          r14, [r4], #-2                    @ *tmpH--
    160 	MLA            r8, r9, r10, r8
    161 	MLA            r8, r12, r14, r8
    162 	LDRSH          r9,  [r6], #2                     @ *tmpX++
    163 	LDRSH          r10, [r4], #-2                    @ *tmpH--
    164 	LDRSH          r12, [r6], #2                     @ *tmpX++
    165 	LDRSH          r14, [r4], #-2                    @ *tmpH--
    166 	MLA            r8, r9, r10, r8
    167         SUBS           r5, r5, #4
    168 	MLA            r8, r12, r14, r8
    169         B              LOOP4
    170 L4:
    171         ADD            r5, r11, r8, LSL #1
    172         MOV            r5, r5, LSR #16                   @extract_h(s)
    173         ADD            r3, r3, #1
    174         STRH           r5, [r2], #2                      @y[n]
    175 
    176         CMP            r3, #64
    177         BLT            LOOP
    178 
    179 Convolve_asm_end:
    180 
    181         LDMFD      r13!, {r4 - r12, r15}
    182 
    183         @ENDFUNC
    184         .END
    185 
    186 
    187