Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
      2 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
      3 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
      4 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
      5 
      6 
      7 ;; i8
      8 define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
      9 ; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
     10 ; PTX32: ret
     11 ; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
     12 ; PTX64: ret
     13   %a = load i8 addrspace(1)* %ptr
     14   ret i8 %a
     15 }
     16 
     17 define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
     18 ; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
     19 ; PTX32: ret
     20 ; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
     21 ; PTX64: ret
     22   %a = load i8 addrspace(3)* %ptr
     23   ret i8 %a
     24 }
     25 
     26 define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
     27 ; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
     28 ; PTX32: ret
     29 ; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
     30 ; PTX64: ret
     31   %a = load i8 addrspace(5)* %ptr
     32   ret i8 %a
     33 }
     34 
     35 ;; i16
     36 define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
     37 ; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
     38 ; PTX32: ret
     39 ; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
     40 ; PTX64: ret
     41   %a = load i16 addrspace(1)* %ptr
     42   ret i16 %a
     43 }
     44 
     45 define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
     46 ; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
     47 ; PTX32: ret
     48 ; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
     49 ; PTX64: ret
     50   %a = load i16 addrspace(3)* %ptr
     51   ret i16 %a
     52 }
     53 
     54 define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
     55 ; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
     56 ; PTX32: ret
     57 ; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
     58 ; PTX64: ret
     59   %a = load i16 addrspace(5)* %ptr
     60   ret i16 %a
     61 }
     62 
     63 ;; i32
     64 define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
     65 ; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     66 ; PTX32: ret
     67 ; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
     68 ; PTX64: ret
     69   %a = load i32 addrspace(1)* %ptr
     70   ret i32 %a
     71 }
     72 
     73 define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
     74 ; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     75 ; PTX32: ret
     76 ; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
     77 ; PTX64: ret
     78   %a = load i32 addrspace(3)* %ptr
     79   ret i32 %a
     80 }
     81 
     82 define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
     83 ; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
     84 ; PTX32: ret
     85 ; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
     86 ; PTX64: ret
     87   %a = load i32 addrspace(5)* %ptr
     88   ret i32 %a
     89 }
     90 
     91 ;; i64
     92 define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
     93 ; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
     94 ; PTX32: ret
     95 ; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
     96 ; PTX64: ret
     97   %a = load i64 addrspace(1)* %ptr
     98   ret i64 %a
     99 }
    100 
    101 define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
    102 ; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
    103 ; PTX32: ret
    104 ; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
    105 ; PTX64: ret
    106   %a = load i64 addrspace(3)* %ptr
    107   ret i64 %a
    108 }
    109 
    110 define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
    111 ; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
    112 ; PTX32: ret
    113 ; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
    114 ; PTX64: ret
    115   %a = load i64 addrspace(5)* %ptr
    116   ret i64 %a
    117 }
    118 
    119 ;; f32
    120 define float @ld_global_f32(float addrspace(1)* %ptr) {
    121 ; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
    122 ; PTX32: ret
    123 ; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
    124 ; PTX64: ret
    125   %a = load float addrspace(1)* %ptr
    126   ret float %a
    127 }
    128 
    129 define float @ld_shared_f32(float addrspace(3)* %ptr) {
    130 ; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
    131 ; PTX32: ret
    132 ; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
    133 ; PTX64: ret
    134   %a = load float addrspace(3)* %ptr
    135   ret float %a
    136 }
    137 
    138 define float @ld_local_f32(float addrspace(5)* %ptr) {
    139 ; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
    140 ; PTX32: ret
    141 ; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
    142 ; PTX64: ret
    143   %a = load float addrspace(5)* %ptr
    144   ret float %a
    145 }
    146 
    147 ;; f64
    148 define double @ld_global_f64(double addrspace(1)* %ptr) {
    149 ; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
    150 ; PTX32: ret
    151 ; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
    152 ; PTX64: ret
    153   %a = load double addrspace(1)* %ptr
    154   ret double %a
    155 }
    156 
    157 define double @ld_shared_f64(double addrspace(3)* %ptr) {
    158 ; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
    159 ; PTX32: ret
    160 ; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
    161 ; PTX64: ret
    162   %a = load double addrspace(3)* %ptr
    163   ret double %a
    164 }
    165 
    166 define double @ld_local_f64(double addrspace(5)* %ptr) {
    167 ; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
    168 ; PTX32: ret
    169 ; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
    170 ; PTX64: ret
    171   %a = load double addrspace(5)* %ptr
    172   ret double %a
    173 }
    174