Home | History | Annotate | Download | only in opts
      1 /***************************************************************************
      2  Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved.
      3 
      4  Licensed under the Apache License, Version 2.0 (the "License"); you
      5  may not use this file except in compliance with the License.  You may
      6  obtain a copy of the License at
      7 
      8  http://www.apache.org/licenses/LICENSE-2.0
      9 
     10  Unless required by applicable law or agreed to in writing, software
     11  distributed under the License is distributed on an "AS IS" BASIS,
     12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
     13  implied.  See the License for the specific language governing
     14  permissions and limitations under the License.
     15  ***************************************************************************/
     16 
     17 	.code 32
     18 	.fpu neon
     19 	.align 4
     20 	.globl	memset32_neon
     21 	.func
     22 
     23 	/* r0 = buffer, r1 = value, r2 = times to write */
     24 memset32_neon:
     25 	cmp		r2, #1
     26 	streq		r1, [r0], #4
     27 	bxeq		lr
     28 
     29 	cmp		r2, #4
     30 	bgt		memset32_neon_start
     31 	cmp		r2, #0
     32 	bxeq		lr
     33 memset32_neon_small:
     34 	str		r1, [r0], #4
     35 	subs		r2, r2, #1
     36 	bne		memset32_neon_small
     37 	bx		lr
     38 memset32_neon_start:
     39 	cmp		r2, #16
     40 	blt		memset32_dropthru
     41 	vdup.32		q0, r1
     42 	vmov		q1, q0
     43 	cmp		r2, #32
     44 	blt		memset32_16
     45 	cmp		r2, #64
     46 	blt		memset32_32
     47 	cmp		r2, #128
     48 	blt		memset32_64
     49 memset32_128:
     50 	movs		r12, r2, lsr #7
     51 memset32_loop128:
     52 	subs		r12, r12, #1
     53 	vst1.64		{q0, q1}, [r0]!
     54 	vst1.64		{q0, q1}, [r0]!
     55 	vst1.64		{q0, q1}, [r0]!
     56 	vst1.64		{q0, q1}, [r0]!
     57 	vst1.64		{q0, q1}, [r0]!
     58 	vst1.64		{q0, q1}, [r0]!
     59 	vst1.64		{q0, q1}, [r0]!
     60 	vst1.64		{q0, q1}, [r0]!
     61 	vst1.64		{q0, q1}, [r0]!
     62 	vst1.64		{q0, q1}, [r0]!
     63 	vst1.64		{q0, q1}, [r0]!
     64 	vst1.64		{q0, q1}, [r0]!
     65 	vst1.64		{q0, q1}, [r0]!
     66 	vst1.64		{q0, q1}, [r0]!
     67 	vst1.64		{q0, q1}, [r0]!
     68 	vst1.64		{q0, q1}, [r0]!
     69 	bne		memset32_loop128
     70 	ands		r2, r2, #0x7f
     71 	bxeq		lr
     72 memset32_64:
     73 	movs		r12, r2, lsr #6
     74 	beq		memset32_32
     75 	vst1.64		{q0, q1}, [r0]!
     76 	vst1.64		{q0, q1}, [r0]!
     77 	vst1.64		{q0, q1}, [r0]!
     78 	vst1.64		{q0, q1}, [r0]!
     79 	vst1.64		{q0, q1}, [r0]!
     80 	vst1.64		{q0, q1}, [r0]!
     81 	vst1.64		{q0, q1}, [r0]!
     82 	vst1.64		{q0, q1}, [r0]!
     83 	ands		r2, r2, #0x3f
     84 	bxeq		lr
     85 memset32_32:
     86 	movs		r12, r2, lsr #5
     87 	beq		memset32_16
     88 	vst1.64		{q0, q1}, [r0]!
     89 	vst1.64		{q0, q1}, [r0]!
     90 	vst1.64		{q0, q1}, [r0]!
     91 	vst1.64		{q0, q1}, [r0]!
     92 	ands		r2, r2, #0x1f
     93 	bxeq		lr
     94 memset32_16:
     95 	movs		r12, r2, lsr #4
     96 	beq		memset32_dropthru
     97 	and		r2, r2, #0xf
     98 	vst1.64		{q0, q1}, [r0]!
     99 	vst1.64		{q0, q1}, [r0]!
    100 memset32_dropthru:
    101 	rsb		r2, r2, #15
    102 	add		pc, pc, r2, lsl #2
    103 	nop
    104 	str		r1, [r0, #56]
    105 	str		r1, [r0, #52]
    106 	str		r1, [r0, #48]
    107 	str		r1, [r0, #44]
    108 	str		r1, [r0, #40]
    109 	str		r1, [r0, #36]
    110 	str		r1, [r0, #32]
    111 	str		r1, [r0, #28]
    112 	str		r1, [r0, #24]
    113 	str		r1, [r0, #20]
    114 	str		r1, [r0, #16]
    115 	str		r1, [r0, #12]
    116 	str		r1, [r0, #8]
    117 	str		r1, [r0, #4]
    118 	str		r1, [r0, #0]
    119 	bx		lr
    120 
    121 	.endfunc
    122 	.end
    123