Home | History | Annotate | Download | only in ARMV7
      1 @/*
      2 @ ** Copyright 2003-2010, VisualOn, Inc.
      3 @ **
      4 @ ** Licensed under the Apache License, Version 2.0 (the "License");
      5 @ ** you may not use this file except in compliance with the License.
      6 @ ** You may obtain a copy of the License at
      7 @ **
      8 @ **     http://www.apache.org/licenses/LICENSE-2.0
      9 @ **
     10 @ ** Unless required by applicable law or agreed to in writing, software
     11 @ ** distributed under the License is distributed on an "AS IS" BASIS,
     12 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 @ ** See the License for the specific language governing permissions and
     14 @ ** limitations under the License.
     15 @ */
     16 
     17 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
     18 @	File:		R4R8First_v7.s
     19 @
     20 @	Content:	Radix8First and Radix4First function armv7 assemble
     21 @
     22 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
     23 
     24 	.section .text
     25 	.global	Radix8First
     26 	.fnstart
     27 
     28 Radix8First:
     29 	stmdb     		sp!, {r4 - r11, lr}
     30 	.save	  		{r4 - r11, lr}
     31 	fstmfdd   		sp!, {d8 - d15}
     32 	.vsave	  		{d8 - d15}
     33 
     34 	ldr       		r3, SQRT1_2
     35 	cmp       		r1, #0
     36 
     37 	VDUP.I32  		Q15, r3
     38 	beq       		Radix8First_END
     39 
     40 Radix8First_LOOP:
     41 	VLD1.I32			{d0, d1, d2, d3},	[r0]!
     42 	VLD1.I32			{d8, d9, d10, d11},	[r0]!
     43 
     44 	VADD.S32			d4, d0, d1		@ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@
     45 	VSUB.S32			d5, d0, d1		@ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@
     46 	VSUB.S32			d7, d2, d3		@ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@
     47 	VADD.S32			d6, d2, d3		@ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@
     48 	VREV64.I32			d7, d7
     49 
     50 	VADD.S32			Q0, Q2, Q3		@ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3)
     51 	VSUB.S32			Q1, Q2, Q3		@ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@
     52 
     53 	VREV64.I32			d3, d3
     54 
     55 	VADD.S32			d4, d8, d9		@ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@
     56 	VSUB.S32			d7, d10, d11	@ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@
     57 	VADD.S32			d6, d10, d11	@ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@
     58 	VREV64.I32			d7, d7
     59 	VSUB.S32			d5, d8, d9		@ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@
     60 
     61 	VTRN.32				d1, d3
     62 
     63 	VADD.S32			Q4, Q2, Q3		@ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@
     64 	VSUB.S32			Q5, Q2, Q3		@ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@
     65 
     66 	VREV64.I32			d3, d3
     67 
     68 	VSHR.S32			d8, d8, #1
     69 	VSHR.S32			Q0, Q0, #1
     70 	VREV64.I32			d10, d10
     71 	VTRN.32				d11, d9
     72 	VSHR.S32			Q1, Q1, #1
     73 	VSHR.S32			d10, d10, #1
     74 	VREV64.I32			d9, d9
     75 
     76 	sub       			r0, r0, #0x40
     77 
     78 	VADD.S32			d12, d0, d8
     79 	VSUB.S32			d16, d0, d8
     80 	VADD.S32			d14, d2, d10
     81 	VSUB.S32			d18, d2, d10
     82 
     83 	VSUB.S32			d4, d11, d9
     84 	VADD.S32			d5, d11, d9
     85 
     86 	VREV64.I32			d18, d18
     87 
     88 	VQDMULH.S32			Q3, Q2, Q15
     89 	VTRN.32				d14, d18
     90 	VTRN.32				d6, d7
     91 	VREV64.I32			d18, d18
     92 
     93 	VSUB.S32			d15, d3, d6
     94 	VREV64.I32			d7, d7
     95 	VADD.S32			d19, d3, d6
     96 	VADD.S32			d13, d1, d7
     97 	VSUB.S32			d17, d1, d7
     98 
     99 	VREV64.I32			d17, d17
    100 	VTRN.32				d13, d17
    101 	VREV64.I32			d17, d17
    102 
    103 	subs       			r1, r1, #1
    104 
    105 	VST1.I32			{d12, d13, d14, d15}, [r0]!
    106 	VST1.I32			{d16, d17, d18, d19}, [r0]!
    107 	bne       			Radix8First_LOOP
    108 
    109 Radix8First_END:
    110 	fldmfdd   sp!, {d8 - d15}
    111 	ldmia     sp!, {r4 - r11, pc}
    112 SQRT1_2:
    113 	.word      0x2d413ccd
    114 
    115 	@ENDP  @ |Radix8First|
    116 	.fnend
    117 
    118 	.section .text
    119 	.global	Radix4First
    120 	.fnstart
    121 
    122 Radix4First:
    123 	stmdb     	sp!, {r4 - r11, lr}
    124 	.save	  	{r4 - r11, lr}
    125 	fstmfdd   	sp!, {d8 - d15}
    126 	.vsave	  	{d8 - d15}
    127 
    128 	cmp       	r1, #0
    129 	beq       	Radix4First_END
    130 
    131 Radix4First_LOOP:
    132 	VLD1.I32			{d0, d1, d2, d3}, [r0]
    133 
    134 	VADD.S32			d4, d0, d1							@ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@
    135 	VSUB.S32			d5, d0, d1							@ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@
    136 	VSUB.S32			d7, d2, d3							@ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@
    137 	VADD.S32			d6, d2, d3							@ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@
    138 
    139 	VREV64.I32		d7, d7									@
    140 
    141 	VADD.S32			Q4, Q2, Q3
    142 	VSUB.S32			Q5, Q2, Q3
    143 
    144 	VREV64.I32		d11, d11
    145 	VTRN.32				d9, d11
    146 	subs       		r1, r1, #1
    147 	VREV64.I32		d11, d11
    148 	VST1.I32			{d8, d9, d10, d11}, [r0]!
    149 
    150 	bne       		Radix4First_LOOP
    151 
    152 Radix4First_END:
    153 	fldmfdd   		sp!, {d8 - d15}
    154 	ldmia    		sp!, {r4 - r11, pc}
    155 
    156 	@ENDP  @ |Radix4First|
    157 	.fnend
    158