Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
      2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
      3 
      4 
      5 ;; i8
      6 define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
      7 ; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
      8 ; PTX32: ret
      9 ; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
     10 ; PTX64: ret
     11   %a = load i8, i8 addrspace(1)* %ptr
     12   ret i8 %a
     13 }
     14 
     15 define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
     16 ; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     17 ; PTX32: ret
     18 ; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
     19 ; PTX64: ret
     20   %a = load i8, i8 addrspace(3)* %ptr
     21   ret i8 %a
     22 }
     23 
     24 define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
     25 ; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     26 ; PTX32: ret
     27 ; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
     28 ; PTX64: ret
     29   %a = load i8, i8 addrspace(5)* %ptr
     30   ret i8 %a
     31 }
     32 
     33 ;; i16
     34 define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
     35 ; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     36 ; PTX32: ret
     37 ; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
     38 ; PTX64: ret
     39   %a = load i16, i16 addrspace(1)* %ptr
     40   ret i16 %a
     41 }
     42 
     43 define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
     44 ; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     45 ; PTX32: ret
     46 ; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
     47 ; PTX64: ret
     48   %a = load i16, i16 addrspace(3)* %ptr
     49   ret i16 %a
     50 }
     51 
     52 define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
     53 ; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     54 ; PTX32: ret
     55 ; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
     56 ; PTX64: ret
     57   %a = load i16, i16 addrspace(5)* %ptr
     58   ret i16 %a
     59 }
     60 
     61 ;; i32
     62 define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
     63 ; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     64 ; PTX32: ret
     65 ; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
     66 ; PTX64: ret
     67   %a = load i32, i32 addrspace(1)* %ptr
     68   ret i32 %a
     69 }
     70 
     71 define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
     72 ; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     73 ; PTX32: ret
     74 ; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
     75 ; PTX64: ret
     76   %a = load i32, i32 addrspace(3)* %ptr
     77   ret i32 %a
     78 }
     79 
     80 define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
     81 ; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     82 ; PTX32: ret
     83 ; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
     84 ; PTX64: ret
     85   %a = load i32, i32 addrspace(5)* %ptr
     86   ret i32 %a
     87 }
     88 
     89 ;; i64
     90 define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
     91 ; PTX32: ld.global.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
     92 ; PTX32: ret
     93 ; PTX64: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
     94 ; PTX64: ret
     95   %a = load i64, i64 addrspace(1)* %ptr
     96   ret i64 %a
     97 }
     98 
     99 define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
    100 ; PTX32: ld.shared.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
    101 ; PTX32: ret
    102 ; PTX64: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
    103 ; PTX64: ret
    104   %a = load i64, i64 addrspace(3)* %ptr
    105   ret i64 %a
    106 }
    107 
    108 define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
    109 ; PTX32: ld.local.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
    110 ; PTX32: ret
    111 ; PTX64: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
    112 ; PTX64: ret
    113   %a = load i64, i64 addrspace(5)* %ptr
    114   ret i64 %a
    115 }
    116 
    117 ;; f32
    118 define float @ld_global_f32(float addrspace(1)* %ptr) {
    119 ; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
    120 ; PTX32: ret
    121 ; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
    122 ; PTX64: ret
    123   %a = load float, float addrspace(1)* %ptr
    124   ret float %a
    125 }
    126 
    127 define float @ld_shared_f32(float addrspace(3)* %ptr) {
    128 ; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
    129 ; PTX32: ret
    130 ; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
    131 ; PTX64: ret
    132   %a = load float, float addrspace(3)* %ptr
    133   ret float %a
    134 }
    135 
    136 define float @ld_local_f32(float addrspace(5)* %ptr) {
    137 ; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
    138 ; PTX32: ret
    139 ; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
    140 ; PTX64: ret
    141   %a = load float, float addrspace(5)* %ptr
    142   ret float %a
    143 }
    144 
    145 ;; f64
    146 define double @ld_global_f64(double addrspace(1)* %ptr) {
    147 ; PTX32: ld.global.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
    148 ; PTX32: ret
    149 ; PTX64: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
    150 ; PTX64: ret
    151   %a = load double, double addrspace(1)* %ptr
    152   ret double %a
    153 }
    154 
    155 define double @ld_shared_f64(double addrspace(3)* %ptr) {
    156 ; PTX32: ld.shared.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
    157 ; PTX32: ret
    158 ; PTX64: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
    159 ; PTX64: ret
    160   %a = load double, double addrspace(3)* %ptr
    161   ret double %a
    162 }
    163 
    164 define double @ld_local_f64(double addrspace(5)* %ptr) {
    165 ; PTX32: ld.local.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
    166 ; PTX32: ret
    167 ; PTX64: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
    168 ; PTX64: ret
    169   %a = load double, double addrspace(5)* %ptr
    170   ret double %a
    171 }
    172