1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2 ; FIXME: Manually added checks for metadata nodes at bottom 3 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=HSA %s 4 ; RUN: opt -mtriple=amdgcn-- -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=MESA %s 5 6 define amdgpu_kernel void @kern_noargs() { 7 ; HSA-LABEL: @kern_noargs( 8 ; HSA-NEXT: ret void 9 ; 10 ; MESA-LABEL: @kern_noargs( 11 ; MESA-NEXT: ret void 12 ; 13 ret void 14 } 15 16 define amdgpu_kernel void @kern_i8(i8 %arg) #0 { 17 ; HSA-LABEL: @kern_i8( 18 ; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 19 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 0 20 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 21 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 22 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 23 ; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 24 ; HSA-NEXT: ret void 25 ; 26 ; MESA-LABEL: @kern_i8( 27 ; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 28 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 36 29 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 30 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 31 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 32 ; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 33 ; MESA-NEXT: ret void 34 ; 35 store i8 %arg, i8 addrspace(1)* undef, align 1 36 ret void 37 } 38 39 define amdgpu_kernel void @kern_i16(i16 %arg) #0 { 40 ; HSA-LABEL: @kern_i16( 41 ; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 42 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 0 43 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 44 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 45 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 46 ; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 47 ; HSA-NEXT: ret void 48 ; 49 ; MESA-LABEL: @kern_i16( 50 ; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 51 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 36 52 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 53 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 54 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 55 ; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 56 ; MESA-NEXT: ret void 57 ; 58 store i16 %arg, i16 addrspace(1)* undef, align 1 59 ret void 60 } 61 62 define amdgpu_kernel void @kern_f16(half %arg) #0 { 63 ; HSA-LABEL: @kern_f16( 64 ; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 65 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 0 66 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 67 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 68 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 69 ; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 70 ; HSA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1 71 ; HSA-NEXT: ret void 72 ; 73 ; MESA-LABEL: @kern_f16( 74 ; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 75 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 36 76 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 77 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 78 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 79 ; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 80 ; MESA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1 81 ; MESA-NEXT: ret void 82 ; 83 store half %arg, half addrspace(1)* undef, align 1 84 ret void 85 } 86 87 define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 { 88 ; HSA-LABEL: @kern_zeroext_i8( 89 ; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 90 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0 91 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 92 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 93 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 94 ; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 95 ; HSA-NEXT: ret void 96 ; 97 ; MESA-LABEL: @kern_zeroext_i8( 98 ; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 99 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36 100 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 101 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 102 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 103 ; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 104 ; MESA-NEXT: ret void 105 ; 106 store i8 %arg, i8 addrspace(1)* undef, align 1 107 ret void 108 } 109 110 define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 { 111 ; HSA-LABEL: @kern_zeroext_i16( 112 ; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 113 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0 114 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 115 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 116 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 117 ; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 118 ; HSA-NEXT: ret void 119 ; 120 ; MESA-LABEL: @kern_zeroext_i16( 121 ; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 122 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36 123 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 124 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 125 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 126 ; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 127 ; MESA-NEXT: ret void 128 ; 129 store i16 %arg, i16 addrspace(1)* undef, align 1 130 ret void 131 } 132 133 define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 { 134 ; HSA-LABEL: @kern_signext_i8( 135 ; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 136 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0 137 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 138 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 139 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 140 ; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 141 ; HSA-NEXT: ret void 142 ; 143 ; MESA-LABEL: @kern_signext_i8( 144 ; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 145 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36 146 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 147 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 148 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 149 ; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 150 ; MESA-NEXT: ret void 151 ; 152 store i8 %arg, i8 addrspace(1)* undef, align 1 153 ret void 154 } 155 156 define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 { 157 ; HSA-LABEL: @kern_signext_i16( 158 ; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 159 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0 160 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 161 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 162 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 163 ; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 164 ; HSA-NEXT: ret void 165 ; 166 ; MESA-LABEL: @kern_signext_i16( 167 ; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 168 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36 169 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 170 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 171 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 172 ; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 173 ; MESA-NEXT: ret void 174 ; 175 store i16 %arg, i16 addrspace(1)* undef, align 1 176 ret void 177 } 178 179 define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) { 180 ; HSA-LABEL: @kern_i8_i8( 181 ; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 182 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0 183 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 184 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 185 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 186 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0 187 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 188 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 189 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 190 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 191 ; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1 192 ; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1 193 ; HSA-NEXT: ret void 194 ; 195 ; MESA-LABEL: @kern_i8_i8( 196 ; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 197 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36 198 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 199 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 200 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 201 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36 202 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 203 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 204 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 205 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 206 ; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1 207 ; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1 208 ; MESA-NEXT: ret void 209 ; 210 store volatile i8 %arg0, i8 addrspace(1)* undef, align 1 211 store volatile i8 %arg1, i8 addrspace(1)* undef, align 1 212 ret void 213 } 214 215 define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) { 216 ; HSA-LABEL: @kern_v3i8( 217 ; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 218 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 0 219 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 220 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 221 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 222 ; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8> 223 ; HSA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4 224 ; HSA-NEXT: ret void 225 ; 226 ; MESA-LABEL: @kern_v3i8( 227 ; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 228 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 36 229 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 230 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 231 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 232 ; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8> 233 ; MESA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4 234 ; MESA-NEXT: ret void 235 ; 236 store <3 x i8> %arg, <3 x i8> addrspace(1)* undef, align 4 237 ret void 238 } 239 240 define amdgpu_kernel void @kern_i24(i24 %arg0) { 241 ; HSA-LABEL: @kern_i24( 242 ; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 243 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 0 244 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 245 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 246 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 247 ; HSA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef 248 ; HSA-NEXT: ret void 249 ; 250 ; MESA-LABEL: @kern_i24( 251 ; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 252 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 36 253 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 254 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 255 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 256 ; MESA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef 257 ; MESA-NEXT: ret void 258 ; 259 store i24 %arg0, i24 addrspace(1)* undef 260 ret void 261 } 262 263 define amdgpu_kernel void @kern_i32(i32 %arg0) { 264 ; HSA-LABEL: @kern_i32( 265 ; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 266 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 0 267 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 268 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 269 ; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 270 ; HSA-NEXT: ret void 271 ; 272 ; MESA-LABEL: @kern_i32( 273 ; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 274 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 36 275 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 276 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 277 ; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 278 ; MESA-NEXT: ret void 279 ; 280 store i32 %arg0, i32 addrspace(1)* undef 281 ret void 282 } 283 284 define amdgpu_kernel void @kern_f32(float %arg0) { 285 ; HSA-LABEL: @kern_f32( 286 ; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 287 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 0 288 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)* 289 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 290 ; HSA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef 291 ; HSA-NEXT: ret void 292 ; 293 ; MESA-LABEL: @kern_f32( 294 ; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 295 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 36 296 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)* 297 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 298 ; MESA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef 299 ; MESA-NEXT: ret void 300 ; 301 store float %arg0, float addrspace(1)* undef 302 ret void 303 } 304 305 define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) { 306 ; HSA-LABEL: @kern_v3i32( 307 ; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 308 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 0 309 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <3 x i32> addrspace(4)* 310 ; HSA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)* 311 ; HSA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 16, !invariant.load !0 312 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 313 ; HSA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4 314 ; HSA-NEXT: ret void 315 ; 316 ; MESA-LABEL: @kern_v3i32( 317 ; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 318 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 36 319 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <3 x i32> addrspace(4)* 320 ; MESA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)* 321 ; MESA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 4, !invariant.load !0 322 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 323 ; MESA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4 324 ; MESA-NEXT: ret void 325 ; 326 store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef, align 4 327 ret void 328 } 329 330 define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 { 331 ; HSA-LABEL: @kern_v8i32( 332 ; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 333 ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 0 334 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)* 335 ; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 336 ; HSA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef 337 ; HSA-NEXT: ret void 338 ; 339 ; MESA-LABEL: @kern_v8i32( 340 ; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 341 ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 36 342 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)* 343 ; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 344 ; MESA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef 345 ; MESA-NEXT: ret void 346 ; 347 store <8 x i32> %arg, <8 x i32> addrspace(1)* undef 348 ret void 349 } 350 351 define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 { 352 ; HSA-LABEL: @kern_v8i64( 353 ; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 354 ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 0 355 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)* 356 ; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 357 ; HSA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef 358 ; HSA-NEXT: ret void 359 ; 360 ; MESA-LABEL: @kern_v8i64( 361 ; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(100) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 362 ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 36 363 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)* 364 ; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 365 ; MESA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef 366 ; MESA-NEXT: ret void 367 ; 368 store <8 x i64> %arg, <8 x i64> addrspace(1)* undef 369 ret void 370 } 371 372 define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 { 373 ; HSA-LABEL: @kern_v16i64( 374 ; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(128) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 375 ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 0 376 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)* 377 ; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 378 ; HSA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef 379 ; HSA-NEXT: ret void 380 ; 381 ; MESA-LABEL: @kern_v16i64( 382 ; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(164) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 383 ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 36 384 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)* 385 ; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 386 ; MESA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef 387 ; MESA-NEXT: ret void 388 ; 389 store <16 x i64> %arg, <16 x i64> addrspace(1)* undef 390 ret void 391 } 392 393 define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) { 394 ; HSA-LABEL: @kern_i32_v3i32( 395 ; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 396 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0 397 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 398 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 399 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16 400 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <3 x i32> addrspace(4)* 401 ; HSA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)* 402 ; HSA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 16, !invariant.load !0 403 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 404 ; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 405 ; HSA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4 406 ; HSA-NEXT: ret void 407 ; 408 ; MESA-LABEL: @kern_i32_v3i32( 409 ; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 410 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36 411 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 412 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 413 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52 414 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <3 x i32> addrspace(4)* 415 ; MESA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)* 416 ; MESA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 4, !invariant.load !0 417 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 418 ; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 419 ; MESA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4 420 ; MESA-NEXT: ret void 421 ; 422 store i32 %arg0, i32 addrspace(1)* undef 423 store <3 x i32> %arg1, <3 x i32> addrspace(1)* undef, align 4 424 ret void 425 } 426 427 %struct.a = type { i32, i8, [4 x i8] } 428 %struct.b.packed = type { i8, i32, [3 x i16], <2 x double> } 429 430 define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) { 431 ; HSA-LABEL: @kern_struct_a( 432 ; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 433 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0 434 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)* 435 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 436 ; HSA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef 437 ; HSA-NEXT: ret void 438 ; 439 ; MESA-LABEL: @kern_struct_a( 440 ; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 441 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36 442 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)* 443 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 444 ; MESA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef 445 ; MESA-NEXT: ret void 446 ; 447 store %struct.a %arg0, %struct.a addrspace(1)* undef 448 ret void 449 } 450 451 define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 { 452 ; HSA-LABEL: @kern_struct_b_packed( 453 ; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 454 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0 455 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)* 456 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 457 ; HSA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef 458 ; HSA-NEXT: ret void 459 ; 460 ; MESA-LABEL: @kern_struct_b_packed( 461 ; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 462 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36 463 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)* 464 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 465 ; MESA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef 466 ; MESA-NEXT: ret void 467 ; 468 store %struct.b.packed %arg0, %struct.b.packed addrspace(1)* undef 469 ret void 470 } 471 472 define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 { 473 ; HSA-LABEL: @kern_implicit_arg_num_bytes( 474 ; HSA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 475 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0 476 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 477 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 478 ; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 479 ; HSA-NEXT: ret void 480 ; 481 ; MESA-LABEL: @kern_implicit_arg_num_bytes( 482 ; MESA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 483 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36 484 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 485 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 486 ; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 487 ; MESA-NEXT: ret void 488 ; 489 store i32 %arg0, i32 addrspace(1)* undef 490 ret void 491 } 492 493 define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) #1 { 494 ; HSA-LABEL: @kernel_implicitarg_no_struct_align( 495 ; HSA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 496 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64 497 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)* 498 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 499 ; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef 500 ; HSA-NEXT: ret void 501 ; 502 ; MESA-LABEL: @kernel_implicitarg_no_struct_align( 503 ; MESA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(108) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 504 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100 505 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)* 506 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 507 ; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef 508 ; MESA-NEXT: ret void 509 ; 510 store i32 %arg1, i32 addrspace(1)* undef 511 ret void 512 } 513 514 define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 { 515 ; HSA-LABEL: @kern_lds_ptr( 516 ; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 517 ; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0 518 ; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* 519 ; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 520 ; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4 521 ; HSA-NEXT: ret void 522 ; 523 ; MESA-LABEL: @kern_lds_ptr( 524 ; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 525 ; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36 526 ; MESA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* 527 ; MESA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 528 ; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4 529 ; MESA-NEXT: ret void 530 ; 531 store i32 0, i32 addrspace(3)* %lds, align 4 532 ret void 533 } 534 535 define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 { 536 ; HSA-LABEL: @kern_lds_ptr_si( 537 ; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 538 ; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 539 ; HSA-NEXT: ret void 540 ; 541 ; MESA-LABEL: @kern_lds_ptr_si( 542 ; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 543 ; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 544 ; MESA-NEXT: ret void 545 ; 546 store i32 0, i32 addrspace(3)* %lds, align 4 547 ret void 548 } 549 550 define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 { 551 ; HSA-LABEL: @kern_realign_i8_i8( 552 ; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 553 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0 554 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 555 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 556 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 557 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0 558 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 559 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 560 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 561 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 562 ; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 563 ; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 564 ; HSA-NEXT: ret void 565 ; 566 ; MESA-LABEL: @kern_realign_i8_i8( 567 ; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 568 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36 569 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 570 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 571 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 572 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36 573 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 574 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 575 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 576 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 577 ; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 578 ; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 579 ; MESA-NEXT: ret void 580 ; 581 store volatile i8 %arg0, i8 addrspace(1)* undef 582 store volatile i8 %arg1, i8 addrspace(1)* undef 583 ret void 584 } 585 586 define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 { 587 ; HSA-LABEL: @kern_realign_i8_i8_i8( 588 ; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 589 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 590 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 591 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 592 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 593 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 594 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 595 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 596 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 597 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 598 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 599 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 600 ; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 601 ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 602 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 603 ; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 604 ; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 605 ; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 606 ; HSA-NEXT: ret void 607 ; 608 ; MESA-LABEL: @kern_realign_i8_i8_i8( 609 ; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 610 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 611 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 612 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 613 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 614 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 615 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 616 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 617 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 618 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 619 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 620 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 621 ; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 622 ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 623 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 624 ; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 625 ; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 626 ; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 627 ; MESA-NEXT: ret void 628 ; 629 store volatile i8 %arg0, i8 addrspace(1)* undef 630 store volatile i8 %arg1, i8 addrspace(1)* undef 631 store volatile i8 %arg2, i8 addrspace(1)* undef 632 ret void 633 } 634 635 define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 { 636 ; HSA-LABEL: @kern_realign_i8_i8_i8_i8( 637 ; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 638 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 639 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 640 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 641 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 642 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 643 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 644 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 645 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 646 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 647 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 648 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 649 ; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 650 ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 651 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 652 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 653 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 654 ; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 655 ; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 656 ; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 657 ; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 658 ; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 659 ; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 660 ; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef 661 ; HSA-NEXT: ret void 662 ; 663 ; MESA-LABEL: @kern_realign_i8_i8_i8_i8( 664 ; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 665 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 666 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 667 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 668 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 669 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 670 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 671 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 672 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 673 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 674 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 675 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 676 ; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 677 ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 678 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 679 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 680 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 681 ; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 682 ; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 683 ; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 684 ; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 685 ; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 686 ; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 687 ; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef 688 ; MESA-NEXT: ret void 689 ; 690 store volatile i8 %arg0, i8 addrspace(1)* undef 691 store volatile i8 %arg1, i8 addrspace(1)* undef 692 store volatile i8 %arg2, i8 addrspace(1)* undef 693 store volatile i8 %arg3, i8 addrspace(1)* undef 694 ret void 695 } 696 697 define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 { 698 ; HSA-LABEL: @kern_realign_i8_v3i8( 699 ; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 700 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0 701 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 702 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 703 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 704 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 4 705 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 706 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 707 ; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24 708 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8> 709 ; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 710 ; HSA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef 711 ; HSA-NEXT: ret void 712 ; 713 ; MESA-LABEL: @kern_realign_i8_v3i8( 714 ; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 715 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36 716 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 717 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 718 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 719 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 40 720 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 721 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0 722 ; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24 723 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8> 724 ; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 725 ; MESA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef 726 ; MESA-NEXT: ret void 727 ; 728 store volatile i8 %arg0, i8 addrspace(1)* undef 729 store volatile <3 x i8> %arg1, <3 x i8> addrspace(1)* undef 730 ret void 731 } 732 733 define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 { 734 ; HSA-LABEL: @kern_realign_i8_i16( 735 ; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 736 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0 737 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 738 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 739 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 740 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0 741 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 742 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 743 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 744 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 745 ; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 746 ; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef 747 ; HSA-NEXT: ret void 748 ; 749 ; MESA-LABEL: @kern_realign_i8_i16( 750 ; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 751 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36 752 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 753 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 754 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 755 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36 756 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 757 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 758 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 759 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 760 ; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 761 ; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef 762 ; MESA-NEXT: ret void 763 ; 764 store volatile i8 %arg0, i8 addrspace(1)* undef 765 store volatile i16 %arg1, i16 addrspace(1)* undef 766 ret void 767 } 768 769 define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 { 770 ; HSA-LABEL: @kern_realign_i1_i1( 771 ; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 772 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0 773 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 774 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 775 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 776 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0 777 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 778 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 779 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 780 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 781 ; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 782 ; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 783 ; HSA-NEXT: ret void 784 ; 785 ; MESA-LABEL: @kern_realign_i1_i1( 786 ; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 787 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36 788 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 789 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 790 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 791 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36 792 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 793 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 794 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 795 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 796 ; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 797 ; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 798 ; MESA-NEXT: ret void 799 ; 800 store volatile i1 %arg0, i1 addrspace(1)* undef 801 store volatile i1 %arg1, i1 addrspace(1)* undef 802 ret void 803 } 804 805 define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 { 806 ; HSA-LABEL: @kern_realign_i1_i1_i1( 807 ; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 808 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 809 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 810 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 811 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 812 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 813 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 814 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 815 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 816 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 817 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 818 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 819 ; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 820 ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 821 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 822 ; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 823 ; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 824 ; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef 825 ; HSA-NEXT: ret void 826 ; 827 ; MESA-LABEL: @kern_realign_i1_i1_i1( 828 ; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 829 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 830 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 831 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 832 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 833 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 834 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 835 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 836 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 837 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 838 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 839 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 840 ; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 841 ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 842 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 843 ; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 844 ; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 845 ; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef 846 ; MESA-NEXT: ret void 847 ; 848 store volatile i1 %arg0, i1 addrspace(1)* undef 849 store volatile i1 %arg1, i1 addrspace(1)* undef 850 store volatile i1 %arg2, i1 addrspace(1)* undef 851 ret void 852 } 853 854 define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 { 855 ; HSA-LABEL: @kern_realign_i1_i1_i1_i1( 856 ; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 857 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 858 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 859 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 860 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 861 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 862 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 863 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 864 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 865 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 866 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 867 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 868 ; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 869 ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 870 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 871 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 872 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 873 ; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 874 ; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 875 ; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1 876 ; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 877 ; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 878 ; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef 879 ; HSA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef 880 ; HSA-NEXT: ret void 881 ; 882 ; MESA-LABEL: @kern_realign_i1_i1_i1_i1( 883 ; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 884 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 885 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 886 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 887 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 888 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 889 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 890 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 891 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 892 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 893 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 894 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 895 ; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 896 ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 897 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 898 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 899 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 900 ; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 901 ; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 902 ; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1 903 ; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 904 ; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 905 ; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef 906 ; MESA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef 907 ; MESA-NEXT: ret void 908 ; 909 store volatile i1 %arg0, i1 addrspace(1)* undef 910 store volatile i1 %arg1, i1 addrspace(1)* undef 911 store volatile i1 %arg2, i1 addrspace(1)* undef 912 store volatile i1 %arg3, i1 addrspace(1)* undef 913 ret void 914 } 915 916 define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 { 917 ; HSA-LABEL: @kern_realign_i1_v3i1( 918 ; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 919 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0 920 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 921 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 922 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 923 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 4 924 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 925 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 926 ; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 927 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1> 928 ; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 929 ; HSA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef 930 ; HSA-NEXT: ret void 931 ; 932 ; MESA-LABEL: @kern_realign_i1_v3i1( 933 ; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 934 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36 935 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 936 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 937 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 938 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 40 939 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 940 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0 941 ; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 942 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1> 943 ; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 944 ; MESA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef 945 ; MESA-NEXT: ret void 946 ; 947 store volatile i1 %arg0, i1 addrspace(1)* undef 948 store volatile <3 x i1> %arg1, <3 x i1> addrspace(1)* undef 949 ret void 950 } 951 952 define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 { 953 ; HSA-LABEL: @kern_realign_i1_i16( 954 ; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 955 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0 956 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 957 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 958 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 959 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0 960 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 961 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 962 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 963 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 964 ; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 965 ; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef 966 ; HSA-NEXT: ret void 967 ; 968 ; MESA-LABEL: @kern_realign_i1_i16( 969 ; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 970 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36 971 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 972 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 973 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 974 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36 975 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 976 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 977 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 978 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 979 ; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 980 ; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef 981 ; MESA-NEXT: ret void 982 ; 983 store volatile i1 %arg0, i1 addrspace(1)* undef 984 store volatile i16 %arg1, i16 addrspace(1)* undef 985 ret void 986 } 987 988 define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 { 989 ; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8( 990 ; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 991 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 992 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 993 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 994 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 995 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 996 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 997 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 998 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 999 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 1000 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 1001 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1002 ; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 1003 ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 1004 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 1005 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 1006 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1007 ; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 1008 ; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 1009 ; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 1010 ; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 1011 ; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1012 ; HSA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1013 ; HSA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8 1014 ; HSA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8 1015 ; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 1016 ; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1017 ; HSA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1018 ; HSA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16 1019 ; HSA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 1020 ; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 1021 ; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1022 ; HSA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1023 ; HSA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24 1024 ; HSA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8 1025 ; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 1026 ; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 1027 ; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 1028 ; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef 1029 ; HSA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef 1030 ; HSA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef 1031 ; HSA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef 1032 ; HSA-NEXT: ret void 1033 ; 1034 ; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8( 1035 ; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1036 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 1037 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1038 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1039 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 1040 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 1041 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1042 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1043 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 1044 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 1045 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 1046 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1047 ; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1048 ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 1049 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 1050 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 1051 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1052 ; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1053 ; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 1054 ; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 1055 ; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 1056 ; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1057 ; MESA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0 1058 ; MESA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8 1059 ; MESA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8 1060 ; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 1061 ; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1062 ; MESA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0 1063 ; MESA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16 1064 ; MESA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 1065 ; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 1066 ; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1067 ; MESA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0 1068 ; MESA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24 1069 ; MESA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8 1070 ; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 1071 ; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 1072 ; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 1073 ; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef 1074 ; MESA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef 1075 ; MESA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef 1076 ; MESA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef 1077 ; MESA-NEXT: ret void 1078 ; 1079 store volatile i8 %arg0, i8 addrspace(1)* undef 1080 store volatile i8 %arg1, i8 addrspace(1)* undef 1081 store volatile i8 %arg2, i8 addrspace(1)* undef 1082 store volatile i8 %arg3, i8 addrspace(1)* undef 1083 store volatile i8 %arg5, i8 addrspace(1)* undef 1084 store volatile i8 %arg6, i8 addrspace(1)* undef 1085 store volatile i8 %arg7, i8 addrspace(1)* undef 1086 ret void 1087 } 1088 1089 define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 { 1090 ; HSA-LABEL: @kern_realign_f16_f16( 1091 ; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1092 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0 1093 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1094 ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 1095 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 1096 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 1097 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0 1098 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1099 ; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 1100 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 1101 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 1102 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half 1103 ; HSA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef 1104 ; HSA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef 1105 ; HSA-NEXT: ret void 1106 ; 1107 ; MESA-LABEL: @kern_realign_f16_f16( 1108 ; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1109 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36 1110 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1111 ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1112 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 1113 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 1114 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36 1115 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1116 ; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1117 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 1118 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 1119 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half 1120 ; MESA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef 1121 ; MESA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef 1122 ; MESA-NEXT: ret void 1123 ; 1124 store volatile half %arg0, half addrspace(1)* undef 1125 store volatile half %arg1, half addrspace(1)* undef 1126 ret void 1127 } 1128 1129 define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 { 1130 ; HSA-LABEL: @kern_global_ptr( 1131 ; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1132 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 1133 ; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1134 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1135 ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1136 ; HSA-NEXT: ret void 1137 ; 1138 ; MESA-LABEL: @kern_global_ptr( 1139 ; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1140 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 1141 ; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1142 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1143 ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1144 ; MESA-NEXT: ret void 1145 ; 1146 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1147 ret void 1148 } 1149 1150 define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* dereferenceable(42) %ptr) #0 { 1151 ; HSA-LABEL: @kern_global_ptr_dereferencable( 1152 ; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1153 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0 1154 ; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1155 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable !1 1156 ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1157 ; HSA-NEXT: ret void 1158 ; 1159 ; MESA-LABEL: @kern_global_ptr_dereferencable( 1160 ; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1161 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36 1162 ; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1163 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !1 1164 ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1165 ; MESA-NEXT: ret void 1166 ; 1167 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1168 ret void 1169 } 1170 1171 define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1)* dereferenceable_or_null(128) %ptr) #0 { 1172 ; HSA-LABEL: @kern_global_ptr_dereferencable_or_null( 1173 ; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1174 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0 1175 ; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1176 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable_or_null !2 1177 ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1178 ; HSA-NEXT: ret void 1179 ; 1180 ; MESA-LABEL: @kern_global_ptr_dereferencable_or_null( 1181 ; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1182 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36 1183 ; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1184 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !2 1185 ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1186 ; MESA-NEXT: ret void 1187 ; 1188 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1189 ret void 1190 } 1191 1192 define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr) #0 { 1193 ; HSA-LABEL: @kern_nonnull_global_ptr( 1194 ; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1195 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 1196 ; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1197 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !nonnull !0 1198 ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1199 ; HSA-NEXT: ret void 1200 ; 1201 ; MESA-LABEL: @kern_nonnull_global_ptr( 1202 ; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1203 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 1204 ; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1205 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !nonnull !0 1206 ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1207 ; MESA-NEXT: ret void 1208 ; 1209 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1210 ret void 1211 } 1212 1213 define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %ptr) #0 { 1214 ; HSA-LABEL: @kern_align32_global_ptr( 1215 ; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1216 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 1217 ; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1218 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !align !3 1219 ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1220 ; HSA-NEXT: ret void 1221 ; 1222 ; MESA-LABEL: @kern_align32_global_ptr( 1223 ; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1224 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 1225 ; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1226 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !3 1227 ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1228 ; MESA-NEXT: ret void 1229 ; 1230 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1231 ret void 1232 } 1233 1234 define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr) #0 { 1235 ; HSA-LABEL: @kern_noalias_global_ptr( 1236 ; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1237 ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef 1238 ; HSA-NEXT: ret void 1239 ; 1240 ; MESA-LABEL: @kern_noalias_global_ptr( 1241 ; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1242 ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef 1243 ; MESA-NEXT: ret void 1244 ; 1245 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1246 ret void 1247 } 1248 1249 define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %ptr0, i8 addrspace(1)* noalias %ptr1) #0 { 1250 ; HSA-LABEL: @kern_noalias_global_ptr_x2( 1251 ; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1252 ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef 1253 ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef 1254 ; HSA-NEXT: ret void 1255 ; 1256 ; MESA-LABEL: @kern_noalias_global_ptr_x2( 1257 ; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1258 ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef 1259 ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef 1260 ; MESA-NEXT: ret void 1261 ; 1262 store volatile i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* undef 1263 store volatile i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* undef 1264 ret void 1265 } 1266 1267 define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 { 1268 ; HSA-LABEL: @struct_i8_i8_arg( 1269 ; HSA-NEXT: entry: 1270 ; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1271 ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0 1272 ; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)* 1273 ; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1274 ; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0 1275 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1 1276 ; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1277 ; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4 1278 ; HSA-NEXT: ret void 1279 ; 1280 ; MESA-LABEL: @struct_i8_i8_arg( 1281 ; MESA-NEXT: entry: 1282 ; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1283 ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36 1284 ; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)* 1285 ; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1286 ; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0 1287 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1 1288 ; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1289 ; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4 1290 ; MESA-NEXT: ret void 1291 ; 1292 entry: 1293 %elt0 = extractvalue {i8, i8} %in, 0 1294 %elt1 = extractvalue {i8, i8} %in, 1 1295 store volatile i8 %elt0, i8 addrspace(1)* null, align 4 1296 store volatile i8 %elt1, i8 addrspace(1)* null, align 4 1297 ret void 1298 } 1299 1300 define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 { 1301 ; HSA-LABEL: @struct_i8_i16_arg( 1302 ; HSA-NEXT: entry: 1303 ; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1304 ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0 1305 ; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)* 1306 ; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1307 ; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0 1308 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1 1309 ; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1310 ; HSA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4 1311 ; HSA-NEXT: ret void 1312 ; 1313 ; MESA-LABEL: @struct_i8_i16_arg( 1314 ; MESA-NEXT: entry: 1315 ; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1316 ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36 1317 ; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)* 1318 ; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1319 ; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0 1320 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1 1321 ; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1322 ; MESA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4 1323 ; MESA-NEXT: ret void 1324 ; 1325 entry: 1326 %elt0 = extractvalue {i8, i16} %in, 0 1327 %elt1 = extractvalue {i8, i16} %in, 1 1328 store volatile i8 %elt0, i8 addrspace(1)* null, align 4 1329 store volatile i16 %elt1, i16 addrspace(1)* null, align 4 1330 ret void 1331 } 1332 1333 define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 { 1334 ; HSA-LABEL: @array_2xi8_arg( 1335 ; HSA-NEXT: entry: 1336 ; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1337 ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0 1338 ; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)* 1339 ; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1340 ; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0 1341 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1 1342 ; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1343 ; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4 1344 ; HSA-NEXT: ret void 1345 ; 1346 ; MESA-LABEL: @array_2xi8_arg( 1347 ; MESA-NEXT: entry: 1348 ; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1349 ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36 1350 ; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)* 1351 ; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1352 ; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0 1353 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1 1354 ; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1355 ; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4 1356 ; MESA-NEXT: ret void 1357 ; 1358 entry: 1359 %elt0 = extractvalue [2 x i8] %in, 0 1360 %elt1 = extractvalue [2 x i8] %in, 1 1361 store volatile i8 %elt0, i8 addrspace(1)* null, align 4 1362 store volatile i8 %elt1, i8 addrspace(1)* null, align 4 1363 ret void 1364 } 1365 1366 define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 { 1367 ; HSA-LABEL: @array_2xi1_arg( 1368 ; HSA-NEXT: entry: 1369 ; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1370 ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0 1371 ; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)* 1372 ; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1373 ; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0 1374 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1 1375 ; HSA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4 1376 ; HSA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4 1377 ; HSA-NEXT: ret void 1378 ; 1379 ; MESA-LABEL: @array_2xi1_arg( 1380 ; MESA-NEXT: entry: 1381 ; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1382 ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36 1383 ; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)* 1384 ; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1385 ; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0 1386 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1 1387 ; MESA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4 1388 ; MESA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4 1389 ; MESA-NEXT: ret void 1390 ; 1391 entry: 1392 %elt0 = extractvalue [2 x i1] %in, 0 1393 %elt1 = extractvalue [2 x i1] %in, 1 1394 store volatile i1 %elt0, i1 addrspace(1)* null, align 4 1395 store volatile i1 %elt1, i1 addrspace(1)* null, align 4 1396 ret void 1397 } 1398 1399 define amdgpu_kernel void @only_empty_struct({} %empty) #0 { 1400 ; HSA-LABEL: @only_empty_struct( 1401 ; HSA-NEXT: ret void 1402 ; 1403 ; MESA-LABEL: @only_empty_struct( 1404 ; MESA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(36) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1405 ; MESA-NEXT: ret void 1406 ; 1407 ret void 1408 } 1409 1410 define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 { 1411 ; HSA-LABEL: @empty_struct_with_other( 1412 ; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1413 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0 1414 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)* 1415 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1416 ; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef 1417 ; HSA-NEXT: ret void 1418 ; 1419 ; MESA-LABEL: @empty_struct_with_other( 1420 ; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1421 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36 1422 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)* 1423 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1424 ; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef 1425 ; MESA-NEXT: ret void 1426 ; 1427 store i32 %arg1, i32 addrspace(1)* undef 1428 ret void 1429 } 1430 1431 attributes #0 = { nounwind "target-cpu"="kaveri" } 1432 attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" } 1433 attributes #2 = { nounwind "target-cpu"="tahiti" } 1434 1435 ; GCN: 0 = !{} 1436 ; GCN: !1 = !{i64 42} 1437 ; GCN: !2 = !{i64 128} 1438 ; GCN: !3 = !{i64 1024} 1439