Home | History | Annotate | Download | only in PTX
      1 ; RUN: llc < %s -march=ptx32 | FileCheck %s
      2 
      3 ;CHECK: .extern .global .b8 array_i16[20];
      4 @array_i16 = external global [10 x i16]
      5 
      6 ;CHECK: .extern .const .b8 array_constant_i16[20];
      7 @array_constant_i16 = external addrspace(1) constant [10 x i16]
      8 
      9 ;CHECK: .extern .local .b8 array_local_i16[20];
     10 @array_local_i16 = external addrspace(2) global [10 x i16]
     11 
     12 ;CHECK: .extern .shared .b8 array_shared_i16[20];
     13 @array_shared_i16 = external addrspace(4) global [10 x i16]
     14 
     15 ;CHECK: .extern .global .b8 array_i32[40];
     16 @array_i32 = external global [10 x i32]
     17 
     18 ;CHECK: .extern .const .b8 array_constant_i32[40];
     19 @array_constant_i32 = external addrspace(1) constant [10 x i32]
     20 
     21 ;CHECK: .extern .local .b8 array_local_i32[40];
     22 @array_local_i32 = external addrspace(2) global [10 x i32]
     23 
     24 ;CHECK: .extern .shared .b8 array_shared_i32[40];
     25 @array_shared_i32 = external addrspace(4) global [10 x i32]
     26 
     27 ;CHECK: .extern .global .b8 array_i64[80];
     28 @array_i64 = external global [10 x i64]
     29 
     30 ;CHECK: .extern .const .b8 array_constant_i64[80];
     31 @array_constant_i64 = external addrspace(1) constant [10 x i64]
     32 
     33 ;CHECK: .extern .local .b8 array_local_i64[80];
     34 @array_local_i64 = external addrspace(2) global [10 x i64]
     35 
     36 ;CHECK: .extern .shared .b8 array_shared_i64[80];
     37 @array_shared_i64 = external addrspace(4) global [10 x i64]
     38 
     39 ;CHECK: .extern .global .b8 array_float[40];
     40 @array_float = external global [10 x float]
     41 
     42 ;CHECK: .extern .const .b8 array_constant_float[40];
     43 @array_constant_float = external addrspace(1) constant [10 x float]
     44 
     45 ;CHECK: .extern .local .b8 array_local_float[40];
     46 @array_local_float = external addrspace(2) global [10 x float]
     47 
     48 ;CHECK: .extern .shared .b8 array_shared_float[40];
     49 @array_shared_float = external addrspace(4) global [10 x float]
     50 
     51 ;CHECK: .extern .global .b8 array_double[80];
     52 @array_double = external global [10 x double]
     53 
     54 ;CHECK: .extern .const .b8 array_constant_double[80];
     55 @array_constant_double = external addrspace(1) constant [10 x double]
     56 
     57 ;CHECK: .extern .local .b8 array_local_double[80];
     58 @array_local_double = external addrspace(2) global [10 x double]
     59 
     60 ;CHECK: .extern .shared .b8 array_shared_double[80];
     61 @array_shared_double = external addrspace(4) global [10 x double]
     62 
     63 
     64 define ptx_device i16 @t1_u16(i16* %p) {
     65 entry:
     66 ;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}];
     67 ;CHECK-NEXT: ret;
     68   %x = load i16* %p
     69   ret i16 %x
     70 }
     71 
     72 define ptx_device i32 @t1_u32(i32* %p) {
     73 entry:
     74 ;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}];
     75 ;CHECK-NEXT: ret;
     76   %x = load i32* %p
     77   ret i32 %x
     78 }
     79 
     80 define ptx_device i64 @t1_u64(i64* %p) {
     81 entry:
     82 ;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}];
     83 ;CHECK-NEXT: ret;
     84   %x = load i64* %p
     85   ret i64 %x
     86 }
     87 
     88 define ptx_device float @t1_f32(float* %p) {
     89 entry:
     90 ;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}];
     91 ;CHECK-NEXT: ret;
     92   %x = load float* %p
     93   ret float %x
     94 }
     95 
     96 define ptx_device double @t1_f64(double* %p) {
     97 entry:
     98 ;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}];
     99 ;CHECK-NEXT: ret;
    100   %x = load double* %p
    101   ret double %x
    102 }
    103 
    104 define ptx_device i16 @t2_u16(i16* %p) {
    105 entry:
    106 ;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2];
    107 ;CHECK-NEXT: ret;
    108   %i = getelementptr i16* %p, i32 1
    109   %x = load i16* %i
    110   ret i16 %x
    111 }
    112 
    113 define ptx_device i32 @t2_u32(i32* %p) {
    114 entry:
    115 ;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}+4];
    116 ;CHECK-NEXT: ret;
    117   %i = getelementptr i32* %p, i32 1
    118   %x = load i32* %i
    119   ret i32 %x
    120 }
    121 
    122 define ptx_device i64 @t2_u64(i64* %p) {
    123 entry:
    124 ;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}+8];
    125 ;CHECK-NEXT: ret;
    126   %i = getelementptr i64* %p, i32 1
    127   %x = load i64* %i
    128   ret i64 %x
    129 }
    130 
    131 define ptx_device float @t2_f32(float* %p) {
    132 entry:
    133 ;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}+4];
    134 ;CHECK-NEXT: ret;
    135   %i = getelementptr float* %p, i32 1
    136   %x = load float* %i
    137   ret float %x
    138 }
    139 
    140 define ptx_device double @t2_f64(double* %p) {
    141 entry:
    142 ;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}+8];
    143 ;CHECK-NEXT: ret;
    144   %i = getelementptr double* %p, i32 1
    145   %x = load double* %i
    146   ret double %x
    147 }
    148 
    149 define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
    150 entry:
    151 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
    152 ;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
    153 ;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]];
    154   %i = getelementptr i16* %p, i32 %q
    155   %x = load i16* %i
    156   ret i16 %x
    157 }
    158 
    159 define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
    160 entry:
    161 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
    162 ;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
    163 ;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]];
    164   %i = getelementptr i32* %p, i32 %q
    165   %x = load i32* %i
    166   ret i32 %x
    167 }
    168 
    169 define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
    170 entry:
    171 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
    172 ;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
    173 ;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]];
    174   %i = getelementptr i64* %p, i32 %q
    175   %x = load i64* %i
    176   ret i64 %x
    177 }
    178 
    179 define ptx_device float @t3_f32(float* %p, i32 %q) {
    180 entry:
    181 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
    182 ;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
    183 ;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]];
    184   %i = getelementptr float* %p, i32 %q
    185   %x = load float* %i
    186   ret float %x
    187 }
    188 
    189 define ptx_device double @t3_f64(double* %p, i32 %q) {
    190 entry:
    191 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
    192 ;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
    193 ;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]];
    194   %i = getelementptr double* %p, i32 %q
    195   %x = load double* %i
    196   ret double %x
    197 }
    198 
    199 define ptx_device i16 @t4_global_u16() {
    200 entry:
    201 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
    202 ;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]];
    203 ;CHECK-NEXT: ret;
    204   %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
    205   %x = load i16* %i
    206   ret i16 %x
    207 }
    208 
    209 define ptx_device i32 @t4_global_u32() {
    210 entry:
    211 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
    212 ;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]];
    213 ;CHECK-NEXT: ret;
    214   %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
    215   %x = load i32* %i
    216   ret i32 %x
    217 }
    218 
    219 define ptx_device i64 @t4_global_u64() {
    220 entry:
    221 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
    222 ;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]];
    223 ;CHECK-NEXT: ret;
    224   %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
    225   %x = load i64* %i
    226   ret i64 %x
    227 }
    228 
    229 define ptx_device float @t4_global_f32() {
    230 entry:
    231 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
    232 ;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]];
    233 ;CHECK-NEXT: ret;
    234   %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
    235   %x = load float* %i
    236   ret float %x
    237 }
    238 
    239 define ptx_device double @t4_global_f64() {
    240 entry:
    241 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
    242 ;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]];
    243 ;CHECK-NEXT: ret;
    244   %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
    245   %x = load double* %i
    246   ret double %x
    247 }
    248 
    249 define ptx_device i16 @t4_const_u16() {
    250 entry:
    251 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i16;
    252 ;CHECK-NEXT: ld.const.u16 rh{{[0-9]+}}, [r[[R0]]];
    253 ;CHECK-NEXT: ret;
    254   %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
    255   %x = load i16 addrspace(1)* %i
    256   ret i16 %x
    257 }
    258 
    259 define ptx_device i32 @t4_const_u32() {
    260 entry:
    261 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i32;
    262 ;CHECK-NEXT: ld.const.u32 r{{[0-9]+}}, [r[[R0]]];
    263 ;CHECK-NEXT: ret;
    264   %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
    265   %x = load i32 addrspace(1)* %i
    266   ret i32 %x
    267 }
    268 
    269 define ptx_device i64 @t4_const_u64() {
    270 entry:
    271 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i64;
    272 ;CHECK-NEXT: ld.const.u64 rd{{[0-9]+}}, [r[[R0]]];
    273 ;CHECK-NEXT: ret;
    274   %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
    275   %x = load i64 addrspace(1)* %i
    276   ret i64 %x
    277 }
    278 
    279 define ptx_device float @t4_const_f32() {
    280 entry:
    281 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_float;
    282 ;CHECK-NEXT: ld.const.f32 r{{[0-9]+}}, [r[[R0]]];
    283 ;CHECK-NEXT: ret;
    284   %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
    285   %x = load float addrspace(1)* %i
    286   ret float %x
    287 }
    288 
    289 define ptx_device double @t4_const_f64() {
    290 entry:
    291 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_double;
    292 ;CHECK-NEXT: ld.const.f64 rd{{[0-9]+}}, [r[[R0]]];
    293 ;CHECK-NEXT: ret;
    294   %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
    295   %x = load double addrspace(1)* %i
    296   ret double %x
    297 }
    298 
    299 define ptx_device i16 @t4_local_u16() {
    300 entry:
    301 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
    302 ;CHECK-NEXT: ld.local.u16 rh{{[0-9]+}}, [r[[R0]]];
    303 ;CHECK-NEXT: ret;
    304   %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
    305   %x = load i16 addrspace(2)* %i
    306   ret i16 %x
    307 }
    308 
    309 define ptx_device i32 @t4_local_u32() {
    310 entry:
    311 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32;
    312 ;CHECK-NEXT: ld.local.u32 r{{[0-9]+}}, [r[[R0]]];
    313 ;CHECK-NEXT: ret;
    314   %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
    315   %x = load i32 addrspace(2)* %i
    316   ret i32 %x
    317 }
    318 
    319 define ptx_device i64 @t4_local_u64() {
    320 entry:
    321 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64;
    322 ;CHECK-NEXT: ld.local.u64 rd{{[0-9]+}}, [r[[R0]]];
    323 ;CHECK-NEXT: ret;
    324   %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
    325   %x = load i64 addrspace(2)* %i
    326   ret i64 %x
    327 }
    328 
    329 define ptx_device float @t4_local_f32() {
    330 entry:
    331 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float;
    332 ;CHECK-NEXT: ld.local.f32 r{{[0-9]+}}, [r[[R0]]];
    333 ;CHECK-NEXT: ret;
    334   %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
    335   %x = load float addrspace(2)* %i
    336   ret float %x
    337 }
    338 
    339 define ptx_device double @t4_local_f64() {
    340 entry:
    341 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double;
    342 ;CHECK-NEXT: ld.local.f64 rd{{[0-9]+}}, [r[[R0]]];
    343 ;CHECK-NEXT: ret;
    344   %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
    345   %x = load double addrspace(2)* %i
    346   ret double %x
    347 }
    348 
    349 define ptx_device i16 @t4_shared_u16() {
    350 entry:
    351 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
    352 ;CHECK-NEXT: ld.shared.u16 rh{{[0-9]+}}, [r[[R0]]];
    353 ;CHECK-NEXT: ret;
    354   %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
    355   %x = load i16 addrspace(4)* %i
    356   ret i16 %x
    357 }
    358 
    359 define ptx_device i32 @t4_shared_u32() {
    360 entry:
    361 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32;
    362 ;CHECK-NEXT: ld.shared.u32 r{{[0-9]+}}, [r[[R0]]];
    363 ;CHECK-NEXT: ret;
    364   %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
    365   %x = load i32 addrspace(4)* %i
    366   ret i32 %x
    367 }
    368 
    369 define ptx_device i64 @t4_shared_u64() {
    370 entry:
    371 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64;
    372 ;CHECK-NEXT: ld.shared.u64 rd{{[0-9]+}}, [r[[R0]]];
    373 ;CHECK-NEXT: ret;
    374   %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
    375   %x = load i64 addrspace(4)* %i
    376   ret i64 %x
    377 }
    378 
    379 define ptx_device float @t4_shared_f32() {
    380 entry:
    381 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float;
    382 ;CHECK-NEXT: ld.shared.f32 r{{[0-9]+}}, [r[[R0]]];
    383 ;CHECK-NEXT: ret;
    384   %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
    385   %x = load float addrspace(4)* %i
    386   ret float %x
    387 }
    388 
    389 define ptx_device double @t4_shared_f64() {
    390 entry:
    391 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double;
    392 ;CHECK-NEXT: ld.shared.f64 rd{{[0-9]+}}, [r[[R0]]];
    393 ;CHECK-NEXT: ret;
    394   %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
    395   %x = load double addrspace(4)* %i
    396   ret double %x
    397 }
    398 
    399 define ptx_device i16 @t5_u16() {
    400 entry:
    401 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
    402 ;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]+2];
    403 ;CHECK-NEXT: ret;
    404   %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
    405   %x = load i16* %i
    406   ret i16 %x
    407 }
    408 
    409 define ptx_device i32 @t5_u32() {
    410 entry:
    411 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
    412 ;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]+4];
    413 ;CHECK-NEXT: ret;
    414   %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
    415   %x = load i32* %i
    416   ret i32 %x
    417 }
    418 
    419 define ptx_device i64 @t5_u64() {
    420 entry:
    421 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
    422 ;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]+8];
    423 ;CHECK-NEXT: ret;
    424   %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
    425   %x = load i64* %i
    426   ret i64 %x
    427 }
    428 
    429 define ptx_device float @t5_f32() {
    430 entry:
    431 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
    432 ;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]+4];
    433 ;CHECK-NEXT: ret;
    434   %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
    435   %x = load float* %i
    436   ret float %x
    437 }
    438 
    439 define ptx_device double @t5_f64() {
    440 entry:
    441 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
    442 ;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]+8];
    443 ;CHECK-NEXT: ret;
    444   %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
    445   %x = load double* %i
    446   ret double %x
    447 }
    448