1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 2 3 ; FUNC-LABEL: @lds_atomic_xchg_ret_i32: 4 ; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]], 5 ; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4 6 ; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 7 ; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0] 8 ; SI: BUFFER_STORE_DWORD [[RESULT]], 9 ; SI: S_ENDPGM 10 define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 11 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst 12 store i32 %result, i32 addrspace(1)* %out, align 4 13 ret void 14 } 15 16 ; FUNC-LABEL: @lds_atomic_xchg_ret_i32_offset: 17 ; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 18 ; SI: S_ENDPGM 19 define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 20 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 21 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst 22 store i32 %result, i32 addrspace(1)* %out, align 4 23 ret void 24 } 25 26 ; XXX - Is it really necessary to load 4 into VGPR? 27 ; FUNC-LABEL: @lds_atomic_add_ret_i32: 28 ; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]], 29 ; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4 30 ; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 31 ; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0] 32 ; SI: BUFFER_STORE_DWORD [[RESULT]], 33 ; SI: S_ENDPGM 34 define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 35 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst 36 store i32 %result, i32 addrspace(1)* %out, align 4 37 ret void 38 } 39 40 ; FUNC-LABEL: @lds_atomic_add_ret_i32_offset: 41 ; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 42 ; SI: S_ENDPGM 43 define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 44 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 45 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 46 store i32 %result, i32 addrspace(1)* %out, align 4 47 ret void 48 } 49 50 ; FUNC-LABEL: @lds_atomic_inc_ret_i32: 51 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 52 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] 53 ; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0 54 ; SI: S_ENDPGM 55 define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 56 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst 57 store i32 %result, i32 addrspace(1)* %out, align 4 58 ret void 59 } 60 61 ; FUNC-LABEL: @lds_atomic_inc_ret_i32_offset: 62 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 63 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] 64 ; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10 65 ; SI: S_ENDPGM 66 define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 67 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 68 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 69 store i32 %result, i32 addrspace(1)* %out, align 4 70 ret void 71 } 72 73 ; FUNC-LABEL: @lds_atomic_sub_ret_i32: 74 ; SI: DS_SUB_RTN_U32 75 ; SI: S_ENDPGM 76 define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 77 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst 78 store i32 %result, i32 addrspace(1)* %out, align 4 79 ret void 80 } 81 82 ; FUNC-LABEL: @lds_atomic_sub_ret_i32_offset: 83 ; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 84 ; SI: S_ENDPGM 85 define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 86 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 87 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst 88 store i32 %result, i32 addrspace(1)* %out, align 4 89 ret void 90 } 91 92 ; FUNC-LABEL: @lds_atomic_dec_ret_i32: 93 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 94 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] 95 ; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0 96 ; SI: S_ENDPGM 97 define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 98 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst 99 store i32 %result, i32 addrspace(1)* %out, align 4 100 ret void 101 } 102 103 ; FUNC-LABEL: @lds_atomic_dec_ret_i32_offset: 104 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 105 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] 106 ; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10 107 ; SI: S_ENDPGM 108 define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 109 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 110 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst 111 store i32 %result, i32 addrspace(1)* %out, align 4 112 ret void 113 } 114 115 ; FUNC-LABEL: @lds_atomic_and_ret_i32: 116 ; SI: DS_AND_RTN_B32 117 ; SI: S_ENDPGM 118 define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 119 %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst 120 store i32 %result, i32 addrspace(1)* %out, align 4 121 ret void 122 } 123 124 ; FUNC-LABEL: @lds_atomic_and_ret_i32_offset: 125 ; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 126 ; SI: S_ENDPGM 127 define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 128 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 129 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst 130 store i32 %result, i32 addrspace(1)* %out, align 4 131 ret void 132 } 133 134 ; FUNC-LABEL: @lds_atomic_or_ret_i32: 135 ; SI: DS_OR_RTN_B32 136 ; SI: S_ENDPGM 137 define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 138 %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst 139 store i32 %result, i32 addrspace(1)* %out, align 4 140 ret void 141 } 142 143 ; FUNC-LABEL: @lds_atomic_or_ret_i32_offset: 144 ; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 145 ; SI: S_ENDPGM 146 define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 147 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 148 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst 149 store i32 %result, i32 addrspace(1)* %out, align 4 150 ret void 151 } 152 153 ; FUNC-LABEL: @lds_atomic_xor_ret_i32: 154 ; SI: DS_XOR_RTN_B32 155 ; SI: S_ENDPGM 156 define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 157 %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst 158 store i32 %result, i32 addrspace(1)* %out, align 4 159 ret void 160 } 161 162 ; FUNC-LABEL: @lds_atomic_xor_ret_i32_offset: 163 ; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 164 ; SI: S_ENDPGM 165 define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 166 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 167 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst 168 store i32 %result, i32 addrspace(1)* %out, align 4 169 ret void 170 } 171 172 ; FIXME: There is no atomic nand instr 173 ; XFUNC-LABEL: @lds_atomic_nand_ret_i32:uction, so we somehow need to expand this. 174 ; define void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 175 ; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst 176 ; store i32 %result, i32 addrspace(1)* %out, align 4 177 ; ret void 178 ; } 179 180 ; FUNC-LABEL: @lds_atomic_min_ret_i32: 181 ; SI: DS_MIN_RTN_I32 182 ; SI: S_ENDPGM 183 define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 184 %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst 185 store i32 %result, i32 addrspace(1)* %out, align 4 186 ret void 187 } 188 189 ; FUNC-LABEL: @lds_atomic_min_ret_i32_offset: 190 ; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 191 ; SI: S_ENDPGM 192 define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 193 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 194 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst 195 store i32 %result, i32 addrspace(1)* %out, align 4 196 ret void 197 } 198 199 ; FUNC-LABEL: @lds_atomic_max_ret_i32: 200 ; SI: DS_MAX_RTN_I32 201 ; SI: S_ENDPGM 202 define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 203 %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst 204 store i32 %result, i32 addrspace(1)* %out, align 4 205 ret void 206 } 207 208 ; FUNC-LABEL: @lds_atomic_max_ret_i32_offset: 209 ; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 210 ; SI: S_ENDPGM 211 define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 212 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 213 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst 214 store i32 %result, i32 addrspace(1)* %out, align 4 215 ret void 216 } 217 218 ; FUNC-LABEL: @lds_atomic_umin_ret_i32: 219 ; SI: DS_MIN_RTN_U32 220 ; SI: S_ENDPGM 221 define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 222 %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst 223 store i32 %result, i32 addrspace(1)* %out, align 4 224 ret void 225 } 226 227 ; FUNC-LABEL: @lds_atomic_umin_ret_i32_offset: 228 ; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 229 ; SI: S_ENDPGM 230 define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 231 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 232 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst 233 store i32 %result, i32 addrspace(1)* %out, align 4 234 ret void 235 } 236 237 ; FUNC-LABEL: @lds_atomic_umax_ret_i32: 238 ; SI: DS_MAX_RTN_U32 239 ; SI: S_ENDPGM 240 define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 241 %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst 242 store i32 %result, i32 addrspace(1)* %out, align 4 243 ret void 244 } 245 246 ; FUNC-LABEL: @lds_atomic_umax_ret_i32_offset: 247 ; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 248 ; SI: S_ENDPGM 249 define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 250 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 251 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst 252 store i32 %result, i32 addrspace(1)* %out, align 4 253 ret void 254 } 255