Home | History | Annotate | Download | only in lib
      1 /* memset.S: optimised assembly memset
      2  *
      3  * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
      4  * Written by David Howells (dhowells (at) redhat.com)
      5  *
      6  * This program is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU General Public License
      8  * as published by the Free Software Foundation; either version
      9  * 2 of the License, or (at your option) any later version.
     10  */
     11 
     12 
     13         .text
     14         .p2align	4
     15 
     16 ###############################################################################
     17 #
     18 # void *memset(void *p, char ch, size_t count)
     19 #
     20 # - NOTE: must not use any stack. exception detection performs function return
     21 #         to caller's fixup routine, aborting the remainder of the set
     22 #         GR4, GR7, GR8, and GR11 must be managed
     23 #
     24 ###############################################################################
     25         .globl		memset,__memset_end
     26         .type		memset,@function
     27 memset:
     28 	orcc.p		gr10,gr0,gr5,icc3		; GR5 = count
     29 	andi		gr9,#0xff,gr9
     30 	or.p		gr8,gr0,gr4			; GR4 = address
     31 	beqlr		icc3,#0
     32 
     33 	# conditionally write a byte to 2b-align the address
     34 	setlos.p	#1,gr6
     35 	andicc		gr4,#1,gr0,icc0
     36 	ckne		icc0,cc7
     37 	cstb.p		gr9,@(gr4,gr0)		,cc7,#1
     38 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
     39 	cadd.p		gr4,gr6,gr4		,cc7,#1
     40 	beqlr		icc3,#0
     41 
     42 	# conditionally write a word to 4b-align the address
     43 	andicc.p	gr4,#2,gr0,icc0
     44 	subicc		gr5,#2,gr0,icc1
     45 	setlos.p	#2,gr6
     46 	ckne		icc0,cc7
     47 	slli.p		gr9,#8,gr12			; need to double up the pattern
     48 	cknc		icc1,cc5
     49 	or.p		gr9,gr12,gr12
     50 	andcr		cc7,cc5,cc7
     51 
     52 	csth.p		gr12,@(gr4,gr0)		,cc7,#1
     53 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
     54 	cadd.p		gr4,gr6,gr4		,cc7,#1
     55 	beqlr		icc3,#0
     56 
     57 	# conditionally write a dword to 8b-align the address
     58 	andicc.p	gr4,#4,gr0,icc0
     59 	subicc		gr5,#4,gr0,icc1
     60 	setlos.p	#4,gr6
     61 	ckne		icc0,cc7
     62 	slli.p		gr12,#16,gr13			; need to quadruple-up the pattern
     63 	cknc		icc1,cc5
     64 	or.p		gr13,gr12,gr12
     65 	andcr		cc7,cc5,cc7
     66 
     67 	cst.p		gr12,@(gr4,gr0)		,cc7,#1
     68 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
     69 	cadd.p		gr4,gr6,gr4		,cc7,#1
     70 	beqlr		icc3,#0
     71 
     72 	or.p		gr12,gr12,gr13			; need to octuple-up the pattern
     73 
     74 	# the address is now 8b-aligned - loop around writing 64b chunks
     75 	setlos		#8,gr7
     76 	subi.p		gr4,#8,gr4			; store with update index does weird stuff
     77 	setlos		#64,gr6
     78 
     79 	subicc		gr5,#64,gr0,icc0
     80 0:	cknc		icc0,cc7
     81 	cstdu		gr12,@(gr4,gr7)		,cc7,#1
     82 	cstdu		gr12,@(gr4,gr7)		,cc7,#1
     83 	cstdu		gr12,@(gr4,gr7)		,cc7,#1
     84 	cstdu		gr12,@(gr4,gr7)		,cc7,#1
     85 	cstdu		gr12,@(gr4,gr7)		,cc7,#1
     86 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
     87 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
     88 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
     89 	subicc		gr5,#64,gr0,icc0
     90 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
     91 	beqlr		icc3,#0
     92 	bnc		icc0,#2,0b
     93 
     94 	# now do 32-byte remnant
     95 	subicc.p	gr5,#32,gr0,icc0
     96 	setlos		#32,gr6
     97 	cknc		icc0,cc7
     98 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
     99 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
    100 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
    101 	setlos		#16,gr6
    102 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
    103 	subicc		gr5,#16,gr0,icc0
    104 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
    105 	beqlr		icc3,#0
    106 
    107 	# now do 16-byte remnant
    108 	cknc		icc0,cc7
    109 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
    110 	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
    111 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
    112 	beqlr		icc3,#0
    113 
    114 	# now do 8-byte remnant
    115 	subicc		gr5,#8,gr0,icc1
    116 	cknc		icc1,cc7
    117 	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
    118 	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
    119 	setlos.p	#4,gr7
    120 	beqlr		icc3,#0
    121 
    122 	# now do 4-byte remnant
    123 	subicc		gr5,#4,gr0,icc0
    124 	addi.p		gr4,#4,gr4
    125 	cknc		icc0,cc7
    126 	cstu.p		gr12,@(gr4,gr7)		,cc7,#1
    127 	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
    128 	subicc.p	gr5,#2,gr0,icc1
    129 	beqlr		icc3,#0
    130 
    131 	# now do 2-byte remnant
    132 	setlos		#2,gr7
    133 	addi.p		gr4,#2,gr4
    134 	cknc		icc1,cc7
    135 	csthu.p		gr12,@(gr4,gr7)		,cc7,#1
    136 	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
    137 	subicc.p	gr5,#1,gr0,icc0
    138 	beqlr		icc3,#0
    139 
    140 	# now do 1-byte remnant
    141 	setlos		#0,gr7
    142 	addi.p		gr4,#2,gr4
    143 	cknc		icc0,cc7
    144 	cstb.p		gr12,@(gr4,gr0)		,cc7,#1
    145 	bralr
    146 __memset_end:
    147 
    148 	.size		memset, __memset_end-memset
    149 
    150 ###############################################################################
    151 #
    152 # clear memory in userspace
    153 # - return the number of bytes that could not be cleared (0 on complete success)
    154 #
    155 # long __memset_user(void *p, size_t count)
    156 #
    157 ###############################################################################
    158         .globl		__memset_user, __memset_user_error_lr, __memset_user_error_handler
    159         .type		__memset_user,@function
    160 __memset_user:
    161 	movsg		lr,gr11
    162 
    163 	# abuse memset to do the dirty work
    164 	or.p		gr9,gr9,gr10
    165 	setlos		#0,gr9
    166 	call		memset
    167 __memset_user_error_lr:
    168 	jmpl.p		@(gr11,gr0)
    169 	setlos		#0,gr8
    170 
    171 	# deal any exception generated by memset
    172 	# GR4  - memset's address tracking pointer
    173 	# GR7  - memset's step value (index register for store insns)
    174 	# GR8  - memset's original start address
    175 	# GR10 - memset's original count
    176 __memset_user_error_handler:
    177 	add.p		gr4,gr7,gr4
    178 	add		gr8,gr10,gr8
    179 	jmpl.p		@(gr11,gr0)
    180 	sub		gr8,gr4,gr8		; we return the amount left uncleared
    181 
    182 	.size		__memset_user, .-__memset_user
    183