Home | History | Annotate | Download | only in ARMV5E
      1 @/*
      2 @ ** Copyright 2003-2010, VisualOn, Inc.
      3 @ **
      4 @ ** Licensed under the Apache License, Version 2.0 (the "License");
      5 @ ** you may not use this file except in compliance with the License.
      6 @ ** You may obtain a copy of the License at
      7 @ **
      8 @ **     http://www.apache.org/licenses/LICENSE-2.0
      9 @ **
     10 @ ** Unless required by applicable law or agreed to in writing, software
     11 @ ** distributed under the License is distributed on an "AS IS" BASIS,
     12 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 @ ** See the License for the specific language governing permissions and
     14 @ ** limitations under the License.
     15 @ */
     16 
     17 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
     18 @	File:		Radix4FFT_v5.s
     19 @
     20 @	Content:	Radix4FFT armv5 assemble
     21 @
     22 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
     23 	.section .text
     24 	.global	Radix4FFT
     25 
     26 Radix4FFT:
     27 	stmdb     sp!, {r4 - r11, lr}
     28 	sub       sp, sp, #32
     29 
     30 	mov			r1, r1, asr #2
     31 	cmp     r1, #0
     32 	beq     Radix4FFT_END
     33 
     34 Radix4FFT_LOOP1:
     35 	mov     r14, r0          							@ xptr = buf@
     36 	mov		r10, r1 												@ i = num@
     37 	mov     r9, r2, lsl #3  							@ step = 2*bgn@
     38 	cmp     r10, #0
     39 	str		r0, [sp]
     40 	str		r1, [sp, #4]
     41 	str		r2, [sp, #8]
     42 	str		r3, [sp, #12]
     43 	beq     Radix4FFT_LOOP1_END
     44 
     45 Radix4FFT_LOOP2:
     46 	mov     r12, r3				        				@ csptr = twidTab@
     47 	mov		r11, r2												@ j = bgn
     48 	cmp     r11, #0
     49 	str		r10, [sp, #16]
     50 	beq     Radix4FFT_LOOP2_END
     51 
     52 Radix4FFT_LOOP3:
     53 	str			r11, [sp, #20]
     54 
     55 	ldrd		r0, [r14, #0]									@ r0 = xptr[0]@ r1 = xptr[1]@
     56 	add			r14, r14, r9 	 								@ xptr += step@
     57 
     58 	ldrd		r10,	[r14, #0]  					 			@ r2 = xptr[0]@ r3 = xptr[1]@
     59 	ldr			r8, [r12], #4									@ cosxsinx = csptr[0]@
     60 
     61 	smulwt	r4, r10, r8										@ L_mpy_wx(cosx, t0)
     62 	smulwt	r3, r11, r8										@ L_mpy_wx(cosx, t1)
     63 
     64 	smlawb	r2, r11, r8, r4								@ r2 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@
     65 	smulwb	r5, r10, r8										@ L_mpy_wx(sinx, t0)
     66 
     67 	mov			r10, r0, asr #2								@ t0 = r0 >> 2@
     68 	mov			r11, r1, asr #2								@	t1 = r1 >> 2@
     69 
     70 	sub			r3, r3, r5										@ r3 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@
     71 	add     r14, r14, r9 	 								@ xptr += step@
     72 
     73 	sub			r0, r10, r2										@ r0 = t0 - r2@
     74 	sub			r1, r11, r3									  @ r1 = t1 - r3@
     75 
     76 	add			r2, r10, r2										@ r2 = t0 + r2@
     77 	add			r3, r11, r3										@ r3 = t1 + r3@
     78 
     79 	str			r2, [sp, #24]
     80 	str			r3, [sp, #28]
     81 
     82 	ldrd		r10, [r14, #0]								@ r4 = xptr[0]@ r5 = xptr[1]@
     83 	ldr			r8, [r12], #4									@ cosxsinx = csptr[1]@
     84 
     85 	smulwt	r6, r10, r8										@ L_mpy_wx(cosx, t0)
     86 	smulwt	r5, r11, r8										@ L_mpy_wx(cosx, t1)
     87 
     88 	smlawb	r4, r11, r8, r6								@ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@
     89 	smulwb	r7, r10, r8										@ L_mpy_wx(sinx, t0)
     90 
     91 	add			r14, r14, r9									@ xptr += step@
     92 	sub			r5, r5, r7										@ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@
     93 
     94 	ldrd		r10, [r14]										@ r6 = xptr[0]@ r7 = xptr[1]@
     95 	ldr			r8, [r12], #4									@ cosxsinx = csptr[1]@
     96 
     97 	smulwt	r2, r10, r8										@ L_mpy_wx(cosx, t0)
     98 	smulwt	r7, r11, r8										@ L_mpy_wx(cosx, t1)
     99 
    100 	smlawb	r6, r11, r8, r2								@ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@
    101 	smulwb	r3, r10, r8										@ L_mpy_wx(sinx, t0)
    102 
    103 	mov			r10, r4												@ t0 = r4@
    104 	mov			r11, r5												@ t1 = r5@
    105 
    106 	sub			r7, r7, r3										@ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@
    107 
    108 
    109 	add			r4,  r10, r6									@	r4 = t0 + r6@
    110 	sub			r5, r7, r11										@ r5 = r7 - t1@
    111 
    112 	sub			r6, r10, r6										@ r6 = t0 - r6@
    113 	add			r7, r7, r11										@ r7 = r7 + t1@
    114 
    115 	ldr			r2, [sp, #24]
    116 	ldr			r3, [sp, #28]
    117 
    118 	add			r10, r0, r5										@ xptr[0] = r0 + r5@
    119 	add			r11, r1, r6										@ xptr[0] = r1 + r6
    120 
    121 	strd		r10, [r14]
    122 	sub			r14, r14, r9									@ xptr -= step@
    123 
    124 	sub			r10, r2, r4										@	xptr[0] = r2 - r4@
    125 	sub			r11, r3, r7										@ xptr[1] = r3 - r7@
    126 
    127 	strd		r10, [r14]
    128 	sub			r14, r14, r9									@ xptr -= step@
    129 
    130 	sub			r10, r0, r5										@ xptr[0] = r0 - r5@
    131 	sub			r11, r1, r6										@ xptr[0] = r1 - r6
    132 
    133 	strd		r10, [r14]
    134 	sub			r14, r14, r9									@ xptr -= step@
    135 
    136 	add			r10, r2, r4										@	xptr[0] = r2 - r4@
    137 	add			r11, r3, r7										@ xptr[1] = r3 - r7@
    138 
    139 	strd		r10, [r14]
    140 	add			r14, r14, #8									@ xptr += 2@
    141 
    142 	ldr			r11, [sp, #20]
    143 	subs		r11, r11, #1
    144 	bne			Radix4FFT_LOOP3
    145 
    146 Radix4FFT_LOOP2_END:
    147 	ldr			r10, [sp, #16]
    148 	ldr			r3, [sp, #12]
    149 	ldr			r2, [sp, #8]
    150 	rsb			r8, r9, r9, lsl #2
    151 	sub			r10, r10, #1
    152 	add			r14, r14, r8
    153 	cmp			r10, #0
    154 	bhi     Radix4FFT_LOOP2
    155 
    156 Radix4FFT_LOOP1_END:
    157 	ldr     r0, [sp]
    158 	ldr		r1, [sp, #4]
    159 	add     r3, r3, r8, asr #1
    160 	mov     r2, r2, lsl #2
    161 	movs    r1, r1, asr #2
    162 	bne     Radix4FFT_LOOP1
    163 
    164 Radix4FFT_END:
    165 	add     sp, sp, #32
    166 	ldmia   sp!, {r4 - r11, pc}
    167 
    168 	@ENDP  @ |Radix4FFT|
    169 	.end
    170