1 ; Verifies correctness of load/store of parameters and return values. 2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s 3 4 %s_i1 = type { i1 } 5 %s_i8 = type { i8 } 6 %s_i16 = type { i16 } 7 %s_f16 = type { half } 8 %s_i32 = type { i32 } 9 %s_f32 = type { float } 10 %s_i64 = type { i64 } 11 %s_f64 = type { double } 12 13 ; More complicated types. i64 is used to increase natural alignment 14 ; requirement for the type. 15 %s_i32x4 = type { i32, i32, i32, i32, i64} 16 %s_i32f32 = type { i32, float, i32, float, i64} 17 %s_i8i32x4 = type { i32, i32, i8, i32, i32, i64} 18 %s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}> 19 %s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]} 20 ; All scalar parameters must be at least 32 bits in size. 21 ; i1 is loaded/stored as i8. 22 23 ; CHECK: .func (.param .b32 func_retval0) 24 ; CHECK-LABEL: test_i1( 25 ; CHECK-NEXT: .param .b32 test_i1_param_0 26 ; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1_param_0]; 27 ; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1; 28 ; CHECK: setp.eq.b16 %p1, [[A]], 1 29 ; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]] 30 ; CHECK: and.b32 [[C:%r[0-9]+]], [[B]], 1; 31 ; CHECK: .param .b32 param0; 32 ; CHECK: st.param.b32 [param0+0], [[C]] 33 ; CHECK: .param .b32 retval0; 34 ; CHECK: call.uni 35 ; CHECK-NEXT: test_i1, 36 ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0]; 37 ; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1; 38 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 39 ; CHECK: ret; 40 define i1 @test_i1(i1 %a) { 41 %r = tail call i1 @test_i1(i1 %a); 42 ret i1 %r; 43 } 44 45 ; Signed i1 is a somewhat special case. We only care about one bit and 46 ; then us neg.s32 to convert it to 32-bit -1 if it's set. 47 ; CHECK: .func (.param .b32 func_retval0) 48 ; CHECK-LABEL: test_i1s( 49 ; CHECK-NEXT: .param .b32 test_i1s_param_0 50 ; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0]; 51 ; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; 52 ; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1; 53 ; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]]; 54 ; CHECK: .param .b32 param0; 55 ; CHECK: st.param.b32 [param0+0], [[A]]; 56 ; CHECK: .param .b32 retval0; 57 ; CHECK: call.uni 58 ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0]; 59 ; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1; 60 ; CHECK: neg.s32 [[R:%r[0-9]+]], [[R1]]; 61 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 62 ; CHECK-NEXT: ret; 63 define signext i1 @test_i1s(i1 signext %a) { 64 %r = tail call signext i1 @test_i1s(i1 signext %a); 65 ret i1 %r; 66 } 67 68 ; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment. 69 ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 70 ; CHECK-LABEL: test_v3i1( 71 ; CHECK-NEXT: .param .align 4 .b8 test_v3i1_param_0[4] 72 ; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2]; 73 ; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i1_param_0] 74 ; CHECK: .param .align 4 .b8 param0[4]; 75 ; CHECK-DAG: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]}; 76 ; CHECK-DAG: st.param.b8 [param0+2], [[E2]]; 77 ; CHECK: .param .align 4 .b8 retval0[4]; 78 ; CHECK: call.uni (retval0), 79 ; CHECK-NEXT: test_v3i1, 80 ; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; 81 ; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; 82 ; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]} 83 ; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]]; 84 ; CHECK-NEXT: ret; 85 define <3 x i1> @test_v3i1(<3 x i1> %a) { 86 %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a); 87 ret <3 x i1> %r; 88 } 89 90 ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 91 ; CHECK-LABEL: test_v4i1( 92 ; CHECK-NEXT: .param .align 4 .b8 test_v4i1_param_0[4] 93 ; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i1_param_0] 94 ; CHECK: .param .align 4 .b8 param0[4]; 95 ; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 96 ; CHECK: .param .align 4 .b8 retval0[4]; 97 ; CHECK: call.uni (retval0), 98 ; CHECK: test_v4i1, 99 ; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 100 ; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}; 101 ; CHECK-NEXT: ret; 102 define <4 x i1> @test_v4i1(<4 x i1> %a) { 103 %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a); 104 ret <4 x i1> %r; 105 } 106 107 ; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 108 ; CHECK-LABEL: test_v5i1( 109 ; CHECK-NEXT: .param .align 8 .b8 test_v5i1_param_0[8] 110 ; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4]; 111 ; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i1_param_0] 112 ; CHECK: .param .align 8 .b8 param0[8]; 113 ; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 114 ; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; 115 ; CHECK: .param .align 8 .b8 retval0[8]; 116 ; CHECK: call.uni (retval0), 117 ; CHECK-NEXT: test_v5i1, 118 ; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 119 ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; 120 ; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 121 ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; 122 ; CHECK-NEXT: ret; 123 define <5 x i1> @test_v5i1(<5 x i1> %a) { 124 %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a); 125 ret <5 x i1> %r; 126 } 127 128 ; Unsigned i8 is loaded directly into 32-bit register. 129 ; CHECK: .func (.param .b32 func_retval0) 130 ; CHECK-LABEL: test_i8( 131 ; CHECK-NEXT: .param .b32 test_i8_param_0 132 ; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0]; 133 ; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; 134 ; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255; 135 ; CHECK: .param .b32 param0; 136 ; CHECK: st.param.b32 [param0+0], [[A]]; 137 ; CHECK: .param .b32 retval0; 138 ; CHECK: call.uni (retval0), 139 ; CHECK: test_i8, 140 ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0]; 141 ; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255; 142 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 143 ; CHECK-NEXT: ret; 144 define i8 @test_i8(i8 %a) { 145 %r = tail call i8 @test_i8(i8 %a); 146 ret i8 %r; 147 } 148 149 ; signed i8 is loaded into 16-bit register which is then sign-extended to i32. 150 ; CHECK: .func (.param .b32 func_retval0) 151 ; CHECK-LABEL: test_i8s( 152 ; CHECK-NEXT: .param .b32 test_i8s_param_0 153 ; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0]; 154 ; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]]; 155 ; CHECK: .param .b32 param0; 156 ; CHECK: st.param.b32 [param0+0], [[A]]; 157 ; CHECK: .param .b32 retval0; 158 ; CHECK: call.uni (retval0), 159 ; CHECK: test_i8s, 160 ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0]; 161 ; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ? 162 ; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]]; 163 ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[R16]]; 164 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 165 ; CHECK-NEXT: ret; 166 define signext i8 @test_i8s(i8 signext %a) { 167 %r = tail call signext i8 @test_i8s(i8 signext %a); 168 ret i8 %r; 169 } 170 171 ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 172 ; CHECK-LABEL: test_v3i8( 173 ; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4] 174 ; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i8_param_0+2]; 175 ; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i8_param_0]; 176 ; CHECK: .param .align 4 .b8 param0[4]; 177 ; CHECK: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]}; 178 ; CHECK: st.param.b8 [param0+2], [[E2]]; 179 ; CHECK: .param .align 4 .b8 retval0[4]; 180 ; CHECK: call.uni (retval0), 181 ; CHECK-NEXT: test_v3i8, 182 ; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; 183 ; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; 184 ; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]}; 185 ; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]]; 186 ; CHECK-NEXT: ret; 187 define <3 x i8> @test_v3i8(<3 x i8> %a) { 188 %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a); 189 ret <3 x i8> %r; 190 } 191 192 ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 193 ; CHECK-LABEL: test_v4i8( 194 ; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4] 195 ; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i8_param_0] 196 ; CHECK: .param .align 4 .b8 param0[4]; 197 ; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 198 ; CHECK: .param .align 4 .b8 retval0[4]; 199 ; CHECK: call.uni (retval0), 200 ; CHECK-NEXT: test_v4i8, 201 ; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 202 ; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 203 ; CHECK-NEXT: ret; 204 define <4 x i8> @test_v4i8(<4 x i8> %a) { 205 %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a); 206 ret <4 x i8> %r; 207 } 208 209 ; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 210 ; CHECK-LABEL: test_v5i8( 211 ; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8] 212 ; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4]; 213 ; CHECK-DAG ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0] 214 ; CHECK: .param .align 8 .b8 param0[8]; 215 ; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 216 ; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; 217 ; CHECK: .param .align 8 .b8 retval0[8]; 218 ; CHECK: call.uni (retval0), 219 ; CHECK-NEXT: test_v5i8, 220 ; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 221 ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; 222 ; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 223 ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; 224 ; CHECK-NEXT: ret; 225 define <5 x i8> @test_v5i8(<5 x i8> %a) { 226 %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a); 227 ret <5 x i8> %r; 228 } 229 230 ; CHECK: .func (.param .b32 func_retval0) 231 ; CHECK-LABEL: test_i16( 232 ; CHECK-NEXT: .param .b32 test_i16_param_0 233 ; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16_param_0]; 234 ; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]]; 235 ; CHECK: .param .b32 param0; 236 ; CHECK: st.param.b32 [param0+0], [[E32]]; 237 ; CHECK: .param .b32 retval0; 238 ; CHECK: call.uni (retval0), 239 ; CHECK-NEXT: test_i16, 240 ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0]; 241 ; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535; 242 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 243 ; CHECK-NEXT: ret; 244 define i16 @test_i16(i16 %a) { 245 %r = tail call i16 @test_i16(i16 %a); 246 ret i16 %r; 247 } 248 249 ; CHECK: .func (.param .b32 func_retval0) 250 ; CHECK-LABEL: test_i16s( 251 ; CHECK-NEXT: .param .b32 test_i16s_param_0 252 ; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16s_param_0]; 253 ; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]]; 254 ; CHECK: .param .b32 param0; 255 ; CHECK: st.param.b32 [param0+0], [[E32]]; 256 ; CHECK: .param .b32 retval0; 257 ; CHECK: call.uni (retval0), 258 ; CHECK-NEXT: test_i16s, 259 ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0]; 260 ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]]; 261 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 262 ; CHECK-NEXT: ret; 263 define signext i16 @test_i16s(i16 signext %a) { 264 %r = tail call signext i16 @test_i16s(i16 signext %a); 265 ret i16 %r; 266 } 267 268 ; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 269 ; CHECK-LABEL: test_v3i16( 270 ; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8] 271 ; CHECK-DAG: ld.param.u16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4]; 272 ; CHECK-DAG: ld.param.v2.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0]; 273 ; CHECK: .param .align 8 .b8 param0[8]; 274 ; CHECK: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; 275 ; CHECK: st.param.b16 [param0+4], [[E2]]; 276 ; CHECK: .param .align 8 .b8 retval0[8]; 277 ; CHECK: call.uni (retval0), 278 ; CHECK-NEXT: test_v3i16, 279 ; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; 280 ; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4]; 281 ; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[RE0]], [[RE1]]}; 282 ; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]]; 283 ; CHECK-NEXT: ret; 284 define <3 x i16> @test_v3i16(<3 x i16> %a) { 285 %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a); 286 ret <3 x i16> %r; 287 } 288 289 ; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 290 ; CHECK-LABEL: test_v4i16( 291 ; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8] 292 ; CHECK: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i16_param_0] 293 ; CHECK: .param .align 8 .b8 param0[8]; 294 ; CHECK: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 295 ; CHECK: .param .align 8 .b8 retval0[8]; 296 ; CHECK: call.uni (retval0), 297 ; CHECK-NEXT: test_v4i16, 298 ; CHECK: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 299 ; CHECK: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 300 ; CHECK-NEXT: ret; 301 define <4 x i16> @test_v4i16(<4 x i16> %a) { 302 %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a); 303 ret <4 x i16> %r; 304 } 305 306 ; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 307 ; CHECK-LABEL: test_v5i16( 308 ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16] 309 ; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8]; 310 ; CHECK-DAG ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0] 311 ; CHECK: .param .align 16 .b8 param0[16]; 312 ; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 313 ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; 314 ; CHECK: .param .align 16 .b8 retval0[16]; 315 ; CHECK: call.uni (retval0), 316 ; CHECK-NEXT: test_v5i16, 317 ; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 318 ; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8]; 319 ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 320 ; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]]; 321 ; CHECK-NEXT: ret; 322 define <5 x i16> @test_v5i16(<5 x i16> %a) { 323 %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a); 324 ret <5 x i16> %r; 325 } 326 327 ; CHECK: .func (.param .b32 func_retval0) 328 ; CHECK-LABEL: test_f16( 329 ; CHECK-NEXT: .param .b32 test_f16_param_0 330 ; CHECK: ld.param.b16 [[E:%h[0-9]+]], [test_f16_param_0]; 331 ; CHECK: .param .b32 param0; 332 ; CHECK: st.param.b16 [param0+0], [[E]]; 333 ; CHECK: .param .b32 retval0; 334 ; CHECK: call.uni (retval0), 335 ; CHECK-NEXT: test_f16, 336 ; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; 337 ; CHECK: st.param.b16 [func_retval0+0], [[R]] 338 ; CHECK-NEXT: ret; 339 define half @test_f16(half %a) { 340 %r = tail call half @test_f16(half %a); 341 ret half %r; 342 } 343 344 ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 345 ; CHECK-LABEL: test_v2f16( 346 ; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4] 347 ; CHECK: ld.param.b32 [[E:%hh[0-9]+]], [test_v2f16_param_0]; 348 ; CHECK: .param .align 4 .b8 param0[4]; 349 ; CHECK: st.param.b32 [param0+0], [[E]]; 350 ; CHECK: .param .align 4 .b8 retval0[4]; 351 ; CHECK: call.uni (retval0), 352 ; CHECK-NEXT: test_v2f16, 353 ; CHECK: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 354 ; CHECK: st.param.b32 [func_retval0+0], [[R]] 355 ; CHECK-NEXT: ret; 356 define <2 x half> @test_v2f16(<2 x half> %a) { 357 %r = tail call <2 x half> @test_v2f16(<2 x half> %a); 358 ret <2 x half> %r; 359 } 360 361 ; CHECK:.func (.param .align 8 .b8 func_retval0[8]) 362 ; CHECK-LABEL: test_v3f16( 363 ; CHECK: .param .align 8 .b8 test_v3f16_param_0[8] 364 ; CHECK-DAG: ld.param.b32 [[HH01:%hh[0-9]+]], [test_v3f16_param_0]; 365 ; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; 366 ; CHECK-DAG: ld.param.b16 [[E2:%h[0-9]+]], [test_v3f16_param_0+4]; 367 ; CHECK: .param .align 8 .b8 param0[8]; 368 ; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; 369 ; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; 370 ; CHECK: .param .align 8 .b8 retval0[8]; 371 ; CHECK: call.uni (retval0), 372 ; CHECK: test_v3f16, 373 ; CHECK-DAG: ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0]; 374 ; CHECK-DAG: ld.param.b16 [[R2:%h[0-9]+]], [retval0+4]; 375 ; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}; 376 ; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]]; 377 ; CHECK: ret; 378 define <3 x half> @test_v3f16(<3 x half> %a) { 379 %r = tail call <3 x half> @test_v3f16(<3 x half> %a); 380 ret <3 x half> %r; 381 } 382 383 ; CHECK:.func (.param .align 8 .b8 func_retval0[8]) 384 ; CHECK-LABEL: test_v4f16( 385 ; CHECK: .param .align 8 .b8 test_v4f16_param_0[8] 386 ; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0]; 387 ; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]]; 388 ; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]]; 389 ; CHECK: .param .align 8 .b8 param0[8]; 390 ; CHECK: st.param.v2.b32 [param0+0], {[[HH01]], [[HH23]]}; 391 ; CHECK: .param .align 8 .b8 retval0[8]; 392 ; CHECK: call.uni (retval0), 393 ; CHECK: test_v4f16, 394 ; CHECK: ld.param.v2.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]]}, [retval0+0]; 395 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]}; 396 ; CHECK: ret; 397 define <4 x half> @test_v4f16(<4 x half> %a) { 398 %r = tail call <4 x half> @test_v4f16(<4 x half> %a); 399 ret <4 x half> %r; 400 } 401 402 ; CHECK:.func (.param .align 16 .b8 func_retval0[16]) 403 ; CHECK-LABEL: test_v5f16( 404 ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] 405 ; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0]; 406 ; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; 407 ; CHECK-DAG: ld.param.b16 [[E4:%h[0-9]+]], [test_v5f16_param_0+8]; 408 ; CHECK: .param .align 16 .b8 param0[16]; 409 ; CHECK-DAG: st.param.v4.b16 [param0+0], 410 ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; 411 ; CHECK: .param .align 16 .b8 retval0[16]; 412 ; CHECK: call.uni (retval0), 413 ; CHECK: test_v5f16, 414 ; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; 415 ; CHECK-DAG: ld.param.b16 [[R4:%h[0-9]+]], [retval0+8]; 416 ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; 417 ; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]]; 418 ; CHECK: ret; 419 define <5 x half> @test_v5f16(<5 x half> %a) { 420 %r = tail call <5 x half> @test_v5f16(<5 x half> %a); 421 ret <5 x half> %r; 422 } 423 424 ; CHECK:.func (.param .align 16 .b8 func_retval0[16]) 425 ; CHECK-LABEL: test_v8f16( 426 ; CHECK: .param .align 16 .b8 test_v8f16_param_0[16] 427 ; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0]; 428 ; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]]; 429 ; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]]; 430 ; CHECK-DAG: mov.b32 [[HH45:%hh[0-9]+]], [[R45]]; 431 ; CHECK-DAG: mov.b32 [[HH67:%hh[0-9]+]], [[R67]]; 432 ; CHECK: .param .align 16 .b8 param0[16]; 433 ; CHECK: st.param.v4.b32 [param0+0], {[[HH01]], [[HH23]], [[HH45]], [[HH67]]}; 434 ; CHECK: .param .align 16 .b8 retval0[16]; 435 ; CHECK: call.uni (retval0), 436 ; CHECK: test_v8f16, 437 ; CHECK: ld.param.v4.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]], [[RH45:%hh[0-9]+]], [[RH67:%hh[0-9]+]]}, [retval0+0]; 438 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; 439 ; CHECK: ret; 440 define <8 x half> @test_v8f16(<8 x half> %a) { 441 %r = tail call <8 x half> @test_v8f16(<8 x half> %a); 442 ret <8 x half> %r; 443 } 444 445 ; CHECK:.func (.param .align 32 .b8 func_retval0[32]) 446 ; CHECK-LABEL: test_v9f16( 447 ; CHECK: .param .align 32 .b8 test_v9f16_param_0[32] 448 ; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0]; 449 ; CHECK-DAG: ld.param.v4.b16 {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8]; 450 ; CHECK-DAG: ld.param.b16 [[E8:%h[0-9]+]], [test_v9f16_param_0+16]; 451 ; CHECK: .param .align 32 .b8 param0[32]; 452 ; CHECK-DAG: st.param.v4.b16 [param0+0], 453 ; CHECK-DAG: st.param.v4.b16 [param0+8], 454 ; CHECK-DAG: st.param.b16 [param0+16], [[E8]]; 455 ; CHECK: .param .align 32 .b8 retval0[32]; 456 ; CHECK: call.uni (retval0), 457 ; CHECK: test_v9f16, 458 ; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; 459 ; CHECK-DAG: ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8]; 460 ; CHECK-DAG: ld.param.b16 [[R8:%h[0-9]+]], [retval0+16]; 461 ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; 462 ; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]}; 463 ; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]]; 464 ; CHECK: ret; 465 define <9 x half> @test_v9f16(<9 x half> %a) { 466 %r = tail call <9 x half> @test_v9f16(<9 x half> %a); 467 ret <9 x half> %r; 468 } 469 470 ; CHECK: .func (.param .b32 func_retval0) 471 ; CHECK-LABEL: test_i32( 472 ; CHECK-NEXT: .param .b32 test_i32_param_0 473 ; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_i32_param_0]; 474 ; CHECK: .param .b32 param0; 475 ; CHECK: st.param.b32 [param0+0], [[E]]; 476 ; CHECK: .param .b32 retval0; 477 ; CHECK: call.uni (retval0), 478 ; CHECK-NEXT: test_i32, 479 ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; 480 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 481 ; CHECK-NEXT: ret; 482 define i32 @test_i32(i32 %a) { 483 %r = tail call i32 @test_i32(i32 %a); 484 ret i32 %r; 485 } 486 487 ; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 488 ; CHECK-LABEL: test_v3i32( 489 ; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16] 490 ; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8]; 491 ; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0]; 492 ; CHECK: .param .align 16 .b8 param0[16]; 493 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 494 ; CHECK: st.param.b32 [param0+8], [[E2]]; 495 ; CHECK: .param .align 16 .b8 retval0[16]; 496 ; CHECK: call.uni (retval0), 497 ; CHECK-NEXT: test_v3i32, 498 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 499 ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 500 ; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 501 ; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]]; 502 ; CHECK-NEXT: ret; 503 define <3 x i32> @test_v3i32(<3 x i32> %a) { 504 %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a); 505 ret <3 x i32> %r; 506 } 507 508 ; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 509 ; CHECK-LABEL: test_v4i32( 510 ; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16] 511 ; CHECK: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0] 512 ; CHECK: .param .align 16 .b8 param0[16]; 513 ; CHECK: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 514 ; CHECK: .param .align 16 .b8 retval0[16]; 515 ; CHECK: call.uni (retval0), 516 ; CHECK-NEXT: test_v4i32, 517 ; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0]; 518 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 519 ; CHCK-NEXT: ret; 520 define <4 x i32> @test_v4i32(<4 x i32> %a) { 521 %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a); 522 ret <4 x i32> %r; 523 } 524 525 ; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 526 ; CHECK-LABEL: test_v5i32( 527 ; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32] 528 ; CHECK-DAG: ld.param.u32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16]; 529 ; CHECK-DAG ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0] 530 ; CHECK: .param .align 32 .b8 param0[32]; 531 ; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 532 ; CHECK-DAG: st.param.b32 [param0+16], [[E4]]; 533 ; CHECK: .param .align 32 .b8 retval0[32]; 534 ; CHECK: call.uni (retval0), 535 ; CHECK-NEXT: test_v5i32, 536 ; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0]; 537 ; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; 538 ; CHECK-DAG: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 539 ; CHECK-DAG: st.param.b32 [func_retval0+16], [[RE4]]; 540 ; CHECK-NEXT: ret; 541 define <5 x i32> @test_v5i32(<5 x i32> %a) { 542 %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a); 543 ret <5 x i32> %r; 544 } 545 546 ; CHECK: .func (.param .b32 func_retval0) 547 ; CHECK-LABEL: test_f32( 548 ; CHECK-NEXT: .param .b32 test_f32_param_0 549 ; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_f32_param_0]; 550 ; CHECK: .param .b32 param0; 551 ; CHECK: st.param.f32 [param0+0], [[E]]; 552 ; CHECK: .param .b32 retval0; 553 ; CHECK: call.uni (retval0), 554 ; CHECK-NEXT: test_f32, 555 ; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0]; 556 ; CHECK: st.param.f32 [func_retval0+0], [[R]]; 557 ; CHECK-NEXT: ret; 558 define float @test_f32(float %a) { 559 %r = tail call float @test_f32(float %a); 560 ret float %r; 561 } 562 563 ; CHECK: .func (.param .b64 func_retval0) 564 ; CHECK-LABEL: test_i64( 565 ; CHECK-NEXT: .param .b64 test_i64_param_0 566 ; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_i64_param_0]; 567 ; CHECK: .param .b64 param0; 568 ; CHECK: st.param.b64 [param0+0], [[E]]; 569 ; CHECK: .param .b64 retval0; 570 ; CHECK: call.uni (retval0), 571 ; CHECK-NEXT: test_i64, 572 ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0]; 573 ; CHECK: st.param.b64 [func_retval0+0], [[R]]; 574 ; CHECK-NEXT: ret; 575 define i64 @test_i64(i64 %a) { 576 %r = tail call i64 @test_i64(i64 %a); 577 ret i64 %r; 578 } 579 580 ; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 581 ; CHECK-LABEL: test_v3i64( 582 ; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32] 583 ; CHECK-DAG: ld.param.u64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16]; 584 ; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0]; 585 ; CHECK: .param .align 32 .b8 param0[32]; 586 ; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]}; 587 ; CHECK: st.param.b64 [param0+16], [[E2]]; 588 ; CHECK: .param .align 32 .b8 retval0[32]; 589 ; CHECK: call.uni (retval0), 590 ; CHECK-NEXT: test_v3i64, 591 ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0]; 592 ; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16]; 593 ; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; 594 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]]; 595 ; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; 596 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]]; 597 ; CHECK-NEXT: ret; 598 define <3 x i64> @test_v3i64(<3 x i64> %a) { 599 %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a); 600 ret <3 x i64> %r; 601 } 602 603 ; For i64 vector loads are limited by PTX to 2 elements. 604 ; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 605 ; CHECK-LABEL: test_v4i64( 606 ; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32] 607 ; CHECK-DAG: ld.param.v2.u64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16]; 608 ; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0]; 609 ; CHECK: .param .align 32 .b8 param0[32]; 610 ; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]}; 611 ; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]}; 612 ; CHECK: .param .align 32 .b8 retval0[32]; 613 ; CHECK: call.uni (retval0), 614 ; CHECK-NEXT: test_v4i64, 615 ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0]; 616 ; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16]; 617 ; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]}; 618 ; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; 619 ; CHECK-NEXT: ret; 620 define <4 x i64> @test_v4i64(<4 x i64> %a) { 621 %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a); 622 ret <4 x i64> %r; 623 } 624 625 ; Aggregates, on the other hand, do not get extended. 626 627 ; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 628 ; CHECK-LABEL: test_s_i1( 629 ; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1] 630 ; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0]; 631 ; CHECK: .param .align 1 .b8 param0[1]; 632 ; CHECK: st.param.b8 [param0+0], [[A]] 633 ; CHECK: .param .align 1 .b8 retval0[1]; 634 ; CHECK: call.uni 635 ; CHECK-NEXT: test_s_i1, 636 ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0]; 637 ; CHECK: st.param.b8 [func_retval0+0], [[R]]; 638 ; CHECK-NEXT: ret; 639 define %s_i1 @test_s_i1(%s_i1 %a) { 640 %r = tail call %s_i1 @test_s_i1(%s_i1 %a); 641 ret %s_i1 %r; 642 } 643 644 ; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 645 ; CHECK-LABEL: test_s_i8( 646 ; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1] 647 ; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0]; 648 ; CHECK: .param .align 1 .b8 param0[1]; 649 ; CHECK: st.param.b8 [param0+0], [[A]] 650 ; CHECK: .param .align 1 .b8 retval0[1]; 651 ; CHECK: call.uni 652 ; CHECK-NEXT: test_s_i8, 653 ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0]; 654 ; CHECK: st.param.b8 [func_retval0+0], [[R]]; 655 ; CHECK-NEXT: ret; 656 define %s_i8 @test_s_i8(%s_i8 %a) { 657 %r = tail call %s_i8 @test_s_i8(%s_i8 %a); 658 ret %s_i8 %r; 659 } 660 661 ; CHECK: .func (.param .align 2 .b8 func_retval0[2]) 662 ; CHECK-LABEL: test_s_i16( 663 ; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2] 664 ; CHECK: ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0]; 665 ; CHECK: .param .align 2 .b8 param0[2]; 666 ; CHECK: st.param.b16 [param0+0], [[A]] 667 ; CHECK: .param .align 2 .b8 retval0[2]; 668 ; CHECK: call.uni 669 ; CHECK-NEXT: test_s_i16, 670 ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; 671 ; CHECK: st.param.b16 [func_retval0+0], [[R]]; 672 ; CHECK-NEXT: ret; 673 define %s_i16 @test_s_i16(%s_i16 %a) { 674 %r = tail call %s_i16 @test_s_i16(%s_i16 %a); 675 ret %s_i16 %r; 676 } 677 678 ; CHECK: .func (.param .align 2 .b8 func_retval0[2]) 679 ; CHECK-LABEL: test_s_f16( 680 ; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2] 681 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0]; 682 ; CHECK: .param .align 2 .b8 param0[2]; 683 ; CHECK: st.param.b16 [param0+0], [[A]] 684 ; CHECK: .param .align 2 .b8 retval0[2]; 685 ; CHECK: call.uni 686 ; CHECK-NEXT: test_s_f16, 687 ; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; 688 ; CHECK: st.param.b16 [func_retval0+0], [[R]]; 689 ; CHECK-NEXT: ret; 690 define %s_f16 @test_s_f16(%s_f16 %a) { 691 %r = tail call %s_f16 @test_s_f16(%s_f16 %a); 692 ret %s_f16 %r; 693 } 694 695 ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 696 ; CHECK-LABEL: test_s_i32( 697 ; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4] 698 ; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_s_i32_param_0]; 699 ; CHECK: .param .align 4 .b8 param0[4] 700 ; CHECK: st.param.b32 [param0+0], [[E]]; 701 ; CHECK: .param .align 4 .b8 retval0[4]; 702 ; CHECK: call.uni (retval0), 703 ; CHECK-NEXT: test_s_i32, 704 ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; 705 ; CHECK: st.param.b32 [func_retval0+0], [[R]]; 706 ; CHECK-NEXT: ret; 707 define %s_i32 @test_s_i32(%s_i32 %a) { 708 %r = tail call %s_i32 @test_s_i32(%s_i32 %a); 709 ret %s_i32 %r; 710 } 711 712 ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 713 ; CHECK-LABEL: test_s_f32( 714 ; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4] 715 ; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_s_f32_param_0]; 716 ; CHECK: .param .align 4 .b8 param0[4] 717 ; CHECK: st.param.f32 [param0+0], [[E]]; 718 ; CHECK: .param .align 4 .b8 retval0[4]; 719 ; CHECK: call.uni (retval0), 720 ; CHECK-NEXT: test_s_f32, 721 ; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0]; 722 ; CHECK: st.param.f32 [func_retval0+0], [[R]]; 723 ; CHECK-NEXT: ret; 724 define %s_f32 @test_s_f32(%s_f32 %a) { 725 %r = tail call %s_f32 @test_s_f32(%s_f32 %a); 726 ret %s_f32 %r; 727 } 728 729 ; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 730 ; CHECK-LABEL: test_s_i64( 731 ; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8] 732 ; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_s_i64_param_0]; 733 ; CHECK: .param .align 8 .b8 param0[8]; 734 ; CHECK: st.param.b64 [param0+0], [[E]]; 735 ; CHECK: .param .align 8 .b8 retval0[8]; 736 ; CHECK: call.uni (retval0), 737 ; CHECK-NEXT: test_s_i64, 738 ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0]; 739 ; CHECK: st.param.b64 [func_retval0+0], [[R]]; 740 ; CHECK-NEXT: ret; 741 define %s_i64 @test_s_i64(%s_i64 %a) { 742 %r = tail call %s_i64 @test_s_i64(%s_i64 %a); 743 ret %s_i64 %r; 744 } 745 746 ; Fields that have different types, but identical sizes are not vectorized. 747 ; CHECK: .func (.param .align 8 .b8 func_retval0[24]) 748 ; CHECK-LABEL: test_s_i32f32( 749 ; CHECK: .param .align 8 .b8 test_s_i32f32_param_0[24] 750 ; CHECK-DAG: ld.param.u64 [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16]; 751 ; CHECK-DAG: ld.param.f32 [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12]; 752 ; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8]; 753 ; CHECK-DAG: ld.param.f32 [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4]; 754 ; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0]; 755 ; CHECK: .param .align 8 .b8 param0[24]; 756 ; CHECK-DAG: st.param.b32 [param0+0], [[E0]]; 757 ; CHECK-DAG: st.param.f32 [param0+4], [[E1]]; 758 ; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; 759 ; CHECK-DAG: st.param.f32 [param0+12], [[E3]]; 760 ; CHECK-DAG: st.param.b64 [param0+16], [[E4]]; 761 ; CHECK: .param .align 8 .b8 retval0[24]; 762 ; CHECK: call.uni (retval0), 763 ; CHECK-NEXT: test_s_i32f32, 764 ; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0+0]; 765 ; CHECK-DAG: ld.param.f32 [[RE1:%f[0-9]+]], [retval0+4]; 766 ; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 767 ; CHECK-DAG: ld.param.f32 [[RE3:%f[0-9]+]], [retval0+12]; 768 ; CHECK-DAG: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; 769 ; CHECK-DAG: st.param.b32 [func_retval0+0], [[RE0]]; 770 ; CHECK-DAG: st.param.f32 [func_retval0+4], [[RE1]]; 771 ; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]]; 772 ; CHECK-DAG: st.param.f32 [func_retval0+12], [[RE3]]; 773 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]]; 774 ; CHECK: ret; 775 define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) { 776 %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a); 777 ret %s_i32f32 %r; 778 } 779 780 ; We do vectorize consecutive fields with matching types. 781 ; CHECK:.visible .func (.param .align 8 .b8 func_retval0[24]) 782 ; CHECK-LABEL: test_s_i32x4( 783 ; CHECK: .param .align 8 .b8 test_s_i32x4_param_0[24] 784 ; CHECK-DAG: ld.param.u64 [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16]; 785 ; CHECK-DAG: ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8]; 786 ; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0]; 787 ; CHECK: .param .align 8 .b8 param0[24]; 788 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 789 ; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; 790 ; CHECK: st.param.b64 [param0+16], [[E4]]; 791 ; CHECK: .param .align 8 .b8 retval0[24]; 792 ; CHECK: call.uni (retval0), 793 ; CHECK-NEXT: test_s_i32x4, 794 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 795 ; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8]; 796 ; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; 797 ; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 798 ; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]}; 799 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]]; 800 ; CHECK: ret; 801 802 define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) { 803 %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a); 804 ret %s_i32x4 %r; 805 } 806 807 ; CHECK:.visible .func (.param .align 8 .b8 func_retval0[32]) 808 ; CHECK-LABEL: test_s_i1i32x4( 809 ; CHECK: .param .align 8 .b8 test_s_i1i32x4_param_0[32] 810 ; CHECK: ld.param.u64 [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24]; 811 ; CHECK: ld.param.u32 [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16]; 812 ; CHECK: ld.param.u32 [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12]; 813 ; CHECK: ld.param.u8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8]; 814 ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0]; 815 ; CHECK: .param .align 8 .b8 param0[32]; 816 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 817 ; CHECK: st.param.b8 [param0+8], [[E2]]; 818 ; CHECK: st.param.b32 [param0+12], [[E3]]; 819 ; CHECK: st.param.b32 [param0+16], [[E4]]; 820 ; CHECK: st.param.b64 [param0+24], [[E5]]; 821 ; CHECK: .param .align 8 .b8 retval0[32]; 822 ; CHECK: call.uni (retval0), 823 ; CHECK: test_s_i1i32x4, 824 ; CHECK: ( 825 ; CHECK: param0 826 ; CHECK: ); 827 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 828 ; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8]; 829 ; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12]; 830 ; CHECK: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; 831 ; CHECK: ld.param.b64 [[RE5:%rd[0-9]+]], [retval0+24]; 832 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 833 ; CHECK: st.param.b8 [func_retval0+8], [[RE2]]; 834 ; CHECK: st.param.b32 [func_retval0+12], [[RE3]]; 835 ; CHECK: st.param.b32 [func_retval0+16], [[RE4]]; 836 ; CHECK: st.param.b64 [func_retval0+24], [[RE5]]; 837 ; CHECK: ret; 838 839 define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { 840 %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a); 841 ret %s_i8i32x4 %r; 842 } 843 844 ; -- All loads/stores from parameters aligned by one must be done one 845 ; -- byte at a time. 846 ; CHECK:.visible .func (.param .align 1 .b8 func_retval0[25]) 847 ; CHECK-LABEL: test_s_i1i32x4p( 848 ; CHECK-DAG: .param .align 1 .b8 test_s_i1i32x4p_param_0[25] 849 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+24]; 850 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+23]; 851 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+22]; 852 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+21]; 853 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+20]; 854 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+19]; 855 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+18]; 856 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+17]; 857 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+16]; 858 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+15]; 859 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+14]; 860 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+13]; 861 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+12]; 862 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+11]; 863 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+10]; 864 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+9]; 865 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+8]; 866 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+7]; 867 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+6]; 868 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+5]; 869 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+4]; 870 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+3]; 871 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+2]; 872 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+1]; 873 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0]; 874 ; --- TODO 875 ; --- Unaligned parameter store/ return value load is broken in both nvcc 876 ; --- and llvm and needs to be fixed. 877 ; CHECK: .param .align 1 .b8 param0[25]; 878 ; CHECK-DAG: st.param.b32 [param0+0], 879 ; CHECK-DAG: st.param.b32 [param0+4], 880 ; CHECK-DAG: st.param.b8 [param0+8], 881 ; CHECK-DAG: st.param.b32 [param0+9], 882 ; CHECK-DAG: st.param.b32 [param0+13], 883 ; CHECK-DAG: st.param.b64 [param0+17], 884 ; CHECK: .param .align 1 .b8 retval0[25]; 885 ; CHECK: call.uni (retval0), 886 ; CHECK-NEXT: test_s_i1i32x4p, 887 ; CHECK-DAG: ld.param.b32 %r41, [retval0+0]; 888 ; CHECK-DAG: ld.param.b32 %r42, [retval0+4]; 889 ; CHECK-DAG: ld.param.b8 %rs2, [retval0+8]; 890 ; CHECK-DAG: ld.param.b32 %r43, [retval0+9]; 891 ; CHECK-DAG: ld.param.b32 %r44, [retval0+13]; 892 ; CHECK-DAG: ld.param.b64 %rd23, [retval0+17]; 893 ; CHECK-DAG: st.param.b32 [func_retval0+0], 894 ; CHECK-DAG: st.param.b32 [func_retval0+4], 895 ; CHECK-DAG: st.param.b8 [func_retval0+8], 896 ; CHECK-DAG: st.param.b32 [func_retval0+9], 897 ; CHECK-DAG: st.param.b32 [func_retval0+13], 898 ; CHECK-DAG: st.param.b64 [func_retval0+17], 899 900 define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) { 901 %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a); 902 ret %s_i8i32x4p %r; 903 } 904 905 ; Check that we can vectorize loads that span multiple aggregate fields. 906 ; CHECK:.visible .func (.param .align 16 .b8 func_retval0[80]) 907 ; CHECK-LABEL: test_s_crossfield( 908 ; CHECK: .param .align 16 .b8 test_s_crossfield_param_0[80] 909 ; CHECK: ld.param.u32 [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64]; 910 ; CHECK: ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48]; 911 ; CHECK: ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32]; 912 ; CHECK: ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16]; 913 ; CHECK: ld.param.u32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8]; 914 ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0]; 915 ; CHECK: .param .align 16 .b8 param0[80]; 916 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 917 ; CHECK: st.param.b32 [param0+8], [[E2]]; 918 ; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]}; 919 ; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]}; 920 ; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]}; 921 ; CHECK: st.param.b32 [param0+64], [[E15]]; 922 ; CHECK: .param .align 16 .b8 retval0[80]; 923 ; CHECK: call.uni (retval0), 924 ; CHECK: test_s_crossfield, 925 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 926 ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 927 ; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16]; 928 ; CHECK: ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32]; 929 ; CHECK: ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48]; 930 ; CHECK: ld.param.b32 [[RE15:%r[0-9]+]], [retval0+64]; 931 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 932 ; CHECK: st.param.b32 [func_retval0+8], [[RE2]]; 933 ; CHECK: st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]}; 934 ; CHECK: st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]}; 935 ; CHECK: st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]}; 936 ; CHECK: st.param.b32 [func_retval0+64], [[RE15]]; 937 ; CHECK: ret; 938 939 define %s_crossfield @test_s_crossfield(%s_crossfield %a) { 940 %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a); 941 ret %s_crossfield %r; 942 } 943