1 ; RUN: llc < %s -march=ptx32 | FileCheck %s 2 3 ;CHECK: .extern .global .b8 array_i16[20]; 4 @array_i16 = external global [10 x i16] 5 6 ;CHECK: .extern .const .b8 array_constant_i16[20]; 7 @array_constant_i16 = external addrspace(1) constant [10 x i16] 8 9 ;CHECK: .extern .local .b8 array_local_i16[20]; 10 @array_local_i16 = external addrspace(2) global [10 x i16] 11 12 ;CHECK: .extern .shared .b8 array_shared_i16[20]; 13 @array_shared_i16 = external addrspace(4) global [10 x i16] 14 15 ;CHECK: .extern .global .b8 array_i32[40]; 16 @array_i32 = external global [10 x i32] 17 18 ;CHECK: .extern .const .b8 array_constant_i32[40]; 19 @array_constant_i32 = external addrspace(1) constant [10 x i32] 20 21 ;CHECK: .extern .local .b8 array_local_i32[40]; 22 @array_local_i32 = external addrspace(2) global [10 x i32] 23 24 ;CHECK: .extern .shared .b8 array_shared_i32[40]; 25 @array_shared_i32 = external addrspace(4) global [10 x i32] 26 27 ;CHECK: .extern .global .b8 array_i64[80]; 28 @array_i64 = external global [10 x i64] 29 30 ;CHECK: .extern .const .b8 array_constant_i64[80]; 31 @array_constant_i64 = external addrspace(1) constant [10 x i64] 32 33 ;CHECK: .extern .local .b8 array_local_i64[80]; 34 @array_local_i64 = external addrspace(2) global [10 x i64] 35 36 ;CHECK: .extern .shared .b8 array_shared_i64[80]; 37 @array_shared_i64 = external addrspace(4) global [10 x i64] 38 39 ;CHECK: .extern .global .b8 array_float[40]; 40 @array_float = external global [10 x float] 41 42 ;CHECK: .extern .const .b8 array_constant_float[40]; 43 @array_constant_float = external addrspace(1) constant [10 x float] 44 45 ;CHECK: .extern .local .b8 array_local_float[40]; 46 @array_local_float = external addrspace(2) global [10 x float] 47 48 ;CHECK: .extern .shared .b8 array_shared_float[40]; 49 @array_shared_float = external addrspace(4) global [10 x float] 50 51 ;CHECK: .extern .global .b8 array_double[80]; 52 @array_double = external global [10 x double] 53 54 ;CHECK: .extern .const .b8 array_constant_double[80]; 55 @array_constant_double = external addrspace(1) constant [10 x double] 56 57 ;CHECK: .extern .local .b8 array_local_double[80]; 58 @array_local_double = external addrspace(2) global [10 x double] 59 60 ;CHECK: .extern .shared .b8 array_shared_double[80]; 61 @array_shared_double = external addrspace(4) global [10 x double] 62 63 64 define ptx_device i16 @t1_u16(i16* %p) { 65 entry: 66 ;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}]; 67 ;CHECK-NEXT: ret; 68 %x = load i16* %p 69 ret i16 %x 70 } 71 72 define ptx_device i32 @t1_u32(i32* %p) { 73 entry: 74 ;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}]; 75 ;CHECK-NEXT: ret; 76 %x = load i32* %p 77 ret i32 %x 78 } 79 80 define ptx_device i64 @t1_u64(i64* %p) { 81 entry: 82 ;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}]; 83 ;CHECK-NEXT: ret; 84 %x = load i64* %p 85 ret i64 %x 86 } 87 88 define ptx_device float @t1_f32(float* %p) { 89 entry: 90 ;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}]; 91 ;CHECK-NEXT: ret; 92 %x = load float* %p 93 ret float %x 94 } 95 96 define ptx_device double @t1_f64(double* %p) { 97 entry: 98 ;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}]; 99 ;CHECK-NEXT: ret; 100 %x = load double* %p 101 ret double %x 102 } 103 104 define ptx_device i16 @t2_u16(i16* %p) { 105 entry: 106 ;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2]; 107 ;CHECK-NEXT: ret; 108 %i = getelementptr i16* %p, i32 1 109 %x = load i16* %i 110 ret i16 %x 111 } 112 113 define ptx_device i32 @t2_u32(i32* %p) { 114 entry: 115 ;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}+4]; 116 ;CHECK-NEXT: ret; 117 %i = getelementptr i32* %p, i32 1 118 %x = load i32* %i 119 ret i32 %x 120 } 121 122 define ptx_device i64 @t2_u64(i64* %p) { 123 entry: 124 ;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}+8]; 125 ;CHECK-NEXT: ret; 126 %i = getelementptr i64* %p, i32 1 127 %x = load i64* %i 128 ret i64 %x 129 } 130 131 define ptx_device float @t2_f32(float* %p) { 132 entry: 133 ;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}+4]; 134 ;CHECK-NEXT: ret; 135 %i = getelementptr float* %p, i32 1 136 %x = load float* %i 137 ret float %x 138 } 139 140 define ptx_device double @t2_f64(double* %p) { 141 entry: 142 ;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}+8]; 143 ;CHECK-NEXT: ret; 144 %i = getelementptr double* %p, i32 1 145 %x = load double* %i 146 ret double %x 147 } 148 149 define ptx_device i16 @t3_u16(i16* %p, i32 %q) { 150 entry: 151 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1; 152 ;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; 153 ;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]]; 154 %i = getelementptr i16* %p, i32 %q 155 %x = load i16* %i 156 ret i16 %x 157 } 158 159 define ptx_device i32 @t3_u32(i32* %p, i32 %q) { 160 entry: 161 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; 162 ;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; 163 ;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]]; 164 %i = getelementptr i32* %p, i32 %q 165 %x = load i32* %i 166 ret i32 %x 167 } 168 169 define ptx_device i64 @t3_u64(i64* %p, i32 %q) { 170 entry: 171 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; 172 ;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; 173 ;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]]; 174 %i = getelementptr i64* %p, i32 %q 175 %x = load i64* %i 176 ret i64 %x 177 } 178 179 define ptx_device float @t3_f32(float* %p, i32 %q) { 180 entry: 181 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; 182 ;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; 183 ;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]]; 184 %i = getelementptr float* %p, i32 %q 185 %x = load float* %i 186 ret float %x 187 } 188 189 define ptx_device double @t3_f64(double* %p, i32 %q) { 190 entry: 191 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; 192 ;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; 193 ;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]]; 194 %i = getelementptr double* %p, i32 %q 195 %x = load double* %i 196 ret double %x 197 } 198 199 define ptx_device i16 @t4_global_u16() { 200 entry: 201 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; 202 ;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]]; 203 ;CHECK-NEXT: ret; 204 %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0 205 %x = load i16* %i 206 ret i16 %x 207 } 208 209 define ptx_device i32 @t4_global_u32() { 210 entry: 211 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; 212 ;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]]; 213 ;CHECK-NEXT: ret; 214 %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 215 %x = load i32* %i 216 ret i32 %x 217 } 218 219 define ptx_device i64 @t4_global_u64() { 220 entry: 221 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; 222 ;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]]; 223 ;CHECK-NEXT: ret; 224 %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 225 %x = load i64* %i 226 ret i64 %x 227 } 228 229 define ptx_device float @t4_global_f32() { 230 entry: 231 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; 232 ;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]]; 233 ;CHECK-NEXT: ret; 234 %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 235 %x = load float* %i 236 ret float %x 237 } 238 239 define ptx_device double @t4_global_f64() { 240 entry: 241 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; 242 ;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]]; 243 ;CHECK-NEXT: ret; 244 %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 245 %x = load double* %i 246 ret double %x 247 } 248 249 define ptx_device i16 @t4_const_u16() { 250 entry: 251 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i16; 252 ;CHECK-NEXT: ld.const.u16 rh{{[0-9]+}}, [r[[R0]]]; 253 ;CHECK-NEXT: ret; 254 %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0 255 %x = load i16 addrspace(1)* %i 256 ret i16 %x 257 } 258 259 define ptx_device i32 @t4_const_u32() { 260 entry: 261 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i32; 262 ;CHECK-NEXT: ld.const.u32 r{{[0-9]+}}, [r[[R0]]]; 263 ;CHECK-NEXT: ret; 264 %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0 265 %x = load i32 addrspace(1)* %i 266 ret i32 %x 267 } 268 269 define ptx_device i64 @t4_const_u64() { 270 entry: 271 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i64; 272 ;CHECK-NEXT: ld.const.u64 rd{{[0-9]+}}, [r[[R0]]]; 273 ;CHECK-NEXT: ret; 274 %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0 275 %x = load i64 addrspace(1)* %i 276 ret i64 %x 277 } 278 279 define ptx_device float @t4_const_f32() { 280 entry: 281 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_float; 282 ;CHECK-NEXT: ld.const.f32 r{{[0-9]+}}, [r[[R0]]]; 283 ;CHECK-NEXT: ret; 284 %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0 285 %x = load float addrspace(1)* %i 286 ret float %x 287 } 288 289 define ptx_device double @t4_const_f64() { 290 entry: 291 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_double; 292 ;CHECK-NEXT: ld.const.f64 rd{{[0-9]+}}, [r[[R0]]]; 293 ;CHECK-NEXT: ret; 294 %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0 295 %x = load double addrspace(1)* %i 296 ret double %x 297 } 298 299 define ptx_device i16 @t4_local_u16() { 300 entry: 301 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16; 302 ;CHECK-NEXT: ld.local.u16 rh{{[0-9]+}}, [r[[R0]]]; 303 ;CHECK-NEXT: ret; 304 %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 305 %x = load i16 addrspace(2)* %i 306 ret i16 %x 307 } 308 309 define ptx_device i32 @t4_local_u32() { 310 entry: 311 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32; 312 ;CHECK-NEXT: ld.local.u32 r{{[0-9]+}}, [r[[R0]]]; 313 ;CHECK-NEXT: ret; 314 %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 315 %x = load i32 addrspace(2)* %i 316 ret i32 %x 317 } 318 319 define ptx_device i64 @t4_local_u64() { 320 entry: 321 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64; 322 ;CHECK-NEXT: ld.local.u64 rd{{[0-9]+}}, [r[[R0]]]; 323 ;CHECK-NEXT: ret; 324 %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 325 %x = load i64 addrspace(2)* %i 326 ret i64 %x 327 } 328 329 define ptx_device float @t4_local_f32() { 330 entry: 331 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float; 332 ;CHECK-NEXT: ld.local.f32 r{{[0-9]+}}, [r[[R0]]]; 333 ;CHECK-NEXT: ret; 334 %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 335 %x = load float addrspace(2)* %i 336 ret float %x 337 } 338 339 define ptx_device double @t4_local_f64() { 340 entry: 341 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double; 342 ;CHECK-NEXT: ld.local.f64 rd{{[0-9]+}}, [r[[R0]]]; 343 ;CHECK-NEXT: ret; 344 %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 345 %x = load double addrspace(2)* %i 346 ret double %x 347 } 348 349 define ptx_device i16 @t4_shared_u16() { 350 entry: 351 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16; 352 ;CHECK-NEXT: ld.shared.u16 rh{{[0-9]+}}, [r[[R0]]]; 353 ;CHECK-NEXT: ret; 354 %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 355 %x = load i16 addrspace(4)* %i 356 ret i16 %x 357 } 358 359 define ptx_device i32 @t4_shared_u32() { 360 entry: 361 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32; 362 ;CHECK-NEXT: ld.shared.u32 r{{[0-9]+}}, [r[[R0]]]; 363 ;CHECK-NEXT: ret; 364 %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 365 %x = load i32 addrspace(4)* %i 366 ret i32 %x 367 } 368 369 define ptx_device i64 @t4_shared_u64() { 370 entry: 371 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64; 372 ;CHECK-NEXT: ld.shared.u64 rd{{[0-9]+}}, [r[[R0]]]; 373 ;CHECK-NEXT: ret; 374 %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 375 %x = load i64 addrspace(4)* %i 376 ret i64 %x 377 } 378 379 define ptx_device float @t4_shared_f32() { 380 entry: 381 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float; 382 ;CHECK-NEXT: ld.shared.f32 r{{[0-9]+}}, [r[[R0]]]; 383 ;CHECK-NEXT: ret; 384 %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 385 %x = load float addrspace(4)* %i 386 ret float %x 387 } 388 389 define ptx_device double @t4_shared_f64() { 390 entry: 391 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double; 392 ;CHECK-NEXT: ld.shared.f64 rd{{[0-9]+}}, [r[[R0]]]; 393 ;CHECK-NEXT: ret; 394 %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 395 %x = load double addrspace(4)* %i 396 ret double %x 397 } 398 399 define ptx_device i16 @t5_u16() { 400 entry: 401 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; 402 ;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]+2]; 403 ;CHECK-NEXT: ret; 404 %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 405 %x = load i16* %i 406 ret i16 %x 407 } 408 409 define ptx_device i32 @t5_u32() { 410 entry: 411 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; 412 ;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]+4]; 413 ;CHECK-NEXT: ret; 414 %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 415 %x = load i32* %i 416 ret i32 %x 417 } 418 419 define ptx_device i64 @t5_u64() { 420 entry: 421 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; 422 ;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]+8]; 423 ;CHECK-NEXT: ret; 424 %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 425 %x = load i64* %i 426 ret i64 %x 427 } 428 429 define ptx_device float @t5_f32() { 430 entry: 431 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; 432 ;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]+4]; 433 ;CHECK-NEXT: ret; 434 %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 435 %x = load float* %i 436 ret float %x 437 } 438 439 define ptx_device double @t5_f64() { 440 entry: 441 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; 442 ;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]+8]; 443 ;CHECK-NEXT: ret; 444 %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 445 %x = load double* %i 446 ret double %x 447 } 448