1 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s 2 3 ; FIXME: Broken on evergreen 4 ; FIXME: For some reason the 8 and 16 vectors are being stored as 5 ; individual elements instead of 128-bit stores. 6 7 8 ; FIXME: Why is the constant moved into the intermediate register and 9 ; not just directly into the vector component? 10 11 ; SI-LABEL: @insertelement_v4f32_0: 12 ; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]: 13 ; V_MOV_B32_e32 14 ; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00 15 ; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]] 16 ; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]: 17 define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 18 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0 19 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 20 ret void 21 } 22 23 ; SI-LABEL: @insertelement_v4f32_1: 24 define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 25 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1 26 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 27 ret void 28 } 29 30 ; SI-LABEL: @insertelement_v4f32_2: 31 define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 32 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2 33 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 34 ret void 35 } 36 37 ; SI-LABEL: @insertelement_v4f32_3: 38 define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 39 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3 40 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 41 ret void 42 } 43 44 ; SI-LABEL: @insertelement_v4i32_0: 45 define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind { 46 %vecins = insertelement <4 x i32> %a, i32 999, i32 0 47 store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16 48 ret void 49 } 50 51 ; SI-LABEL: @dynamic_insertelement_v2f32: 52 ; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00 53 ; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]] 54 ; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]: 55 define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind { 56 %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b 57 store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8 58 ret void 59 } 60 61 ; SI-LABEL: @dynamic_insertelement_v4f32: 62 ; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00 63 ; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]] 64 ; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]: 65 define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind { 66 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b 67 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 68 ret void 69 } 70 71 ; SI-LABEL: @dynamic_insertelement_v8f32: 72 ; FIXMESI: BUFFER_STORE_DWORDX4 73 ; FIXMESI: BUFFER_STORE_DWORDX4 74 define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind { 75 %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b 76 store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32 77 ret void 78 } 79 80 ; SI-LABEL: @dynamic_insertelement_v16f32: 81 ; FIXMESI: BUFFER_STORE_DWORDX4 82 ; FIXMESI: BUFFER_STORE_DWORDX4 83 ; FIXMESI: BUFFER_STORE_DWORDX4 84 ; FIXMESI: BUFFER_STORE_DWORDX4 85 define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind { 86 %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b 87 store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64 88 ret void 89 } 90 91 ; SI-LABEL: @dynamic_insertelement_v2i32: 92 ; SI: BUFFER_STORE_DWORDX2 93 define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind { 94 %vecins = insertelement <2 x i32> %a, i32 5, i32 %b 95 store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8 96 ret void 97 } 98 99 ; SI-LABEL: @dynamic_insertelement_v4i32: 100 ; SI: BUFFER_STORE_DWORDX4 101 define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind { 102 %vecins = insertelement <4 x i32> %a, i32 5, i32 %b 103 store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16 104 ret void 105 } 106 107 ; SI-LABEL: @dynamic_insertelement_v8i32: 108 ; FIXMESI: BUFFER_STORE_DWORDX4 109 ; FIXMESI: BUFFER_STORE_DWORDX4 110 define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind { 111 %vecins = insertelement <8 x i32> %a, i32 5, i32 %b 112 store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32 113 ret void 114 } 115 116 ; SI-LABEL: @dynamic_insertelement_v16i32: 117 ; FIXMESI: BUFFER_STORE_DWORDX4 118 ; FIXMESI: BUFFER_STORE_DWORDX4 119 ; FIXMESI: BUFFER_STORE_DWORDX4 120 ; FIXMESI: BUFFER_STORE_DWORDX4 121 define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind { 122 %vecins = insertelement <16 x i32> %a, i32 5, i32 %b 123 store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64 124 ret void 125 } 126 127 128 ; SI-LABEL: @dynamic_insertelement_v2i16: 129 ; FIXMESI: BUFFER_STORE_DWORDX2 130 define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind { 131 %vecins = insertelement <2 x i16> %a, i16 5, i32 %b 132 store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8 133 ret void 134 } 135 136 ; SI-LABEL: @dynamic_insertelement_v4i16: 137 ; FIXMESI: BUFFER_STORE_DWORDX4 138 define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind { 139 %vecins = insertelement <4 x i16> %a, i16 5, i32 %b 140 store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16 141 ret void 142 } 143 144 145 ; SI-LABEL: @dynamic_insertelement_v2i8: 146 ; FIXMESI: BUFFER_STORE_USHORT 147 define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind { 148 %vecins = insertelement <2 x i8> %a, i8 5, i32 %b 149 store <2 x i8> %vecins, <2 x i8> addrspace(1)* %out, align 8 150 ret void 151 } 152 153 ; SI-LABEL: @dynamic_insertelement_v4i8: 154 ; FIXMESI: BUFFER_STORE_DWORD 155 define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind { 156 %vecins = insertelement <4 x i8> %a, i8 5, i32 %b 157 store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16 158 ret void 159 } 160 161 ; SI-LABEL: @dynamic_insertelement_v8i8: 162 ; FIXMESI: BUFFER_STORE_DWORDX2 163 define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind { 164 %vecins = insertelement <8 x i8> %a, i8 5, i32 %b 165 store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16 166 ret void 167 } 168 169 ; SI-LABEL: @dynamic_insertelement_v16i8: 170 ; FIXMESI: BUFFER_STORE_DWORDX4 171 define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind { 172 %vecins = insertelement <16 x i8> %a, i8 5, i32 %b 173 store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16 174 ret void 175 } 176 177 ; This test requires handling INSERT_SUBREG in SIFixSGPRCopies. Check that 178 ; the compiler doesn't crash. 179 ; SI-LABEL: @insert_split_bb 180 define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) { 181 entry: 182 %0 = insertelement <2 x i32> undef, i32 %a, i32 0 183 %1 = icmp eq i32 %a, 0 184 br i1 %1, label %if, label %else 185 186 if: 187 %2 = load i32 addrspace(1)* %in 188 %3 = insertelement <2 x i32> %0, i32 %2, i32 1 189 br label %endif 190 191 else: 192 %4 = getelementptr i32 addrspace(1)* %in, i32 1 193 %5 = load i32 addrspace(1)* %4 194 %6 = insertelement <2 x i32> %0, i32 %5, i32 1 195 br label %endif 196 197 endif: 198 %7 = phi <2 x i32> [%3, %if], [%6, %else] 199 store <2 x i32> %7, <2 x i32> addrspace(1)* %out 200 ret void 201 } 202