Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefix=SM20 %s
      2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck -check-prefix=SM35 %s
      3 
      4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
      5 target triple = "nvptx64-unknown-unknown"
      6 
      7 ; SM20-LABEL: .visible .entry foo1(
      8 ; SM20: ld.global.f32
      9 ; SM35-LABEL: .visible .entry foo1(
     10 ; SM35: ld.global.nc.f32
     11 define void @foo1(float * noalias readonly %from, float * %to) {
     12   %1 = load float, float * %from
     13   store float %1, float * %to
     14   ret void
     15 }
     16 
     17 ; SM20-LABEL: .visible .entry foo2(
     18 ; SM20: ld.global.f64
     19 ; SM35-LABEL: .visible .entry foo2(
     20 ; SM35: ld.global.nc.f64
     21 define void @foo2(double * noalias readonly %from, double * %to) {
     22   %1 = load double, double * %from
     23   store double %1, double * %to
     24   ret void
     25 }
     26 
     27 ; SM20-LABEL: .visible .entry foo3(
     28 ; SM20: ld.global.u16
     29 ; SM35-LABEL: .visible .entry foo3(
     30 ; SM35: ld.global.nc.u16
     31 define void @foo3(i16 * noalias readonly %from, i16 * %to) {
     32   %1 = load i16, i16 * %from
     33   store i16 %1, i16 * %to
     34   ret void
     35 }
     36 
     37 ; SM20-LABEL: .visible .entry foo4(
     38 ; SM20: ld.global.u32
     39 ; SM35-LABEL: .visible .entry foo4(
     40 ; SM35: ld.global.nc.u32
     41 define void @foo4(i32 * noalias readonly %from, i32 * %to) {
     42   %1 = load i32, i32 * %from
     43   store i32 %1, i32 * %to
     44   ret void
     45 }
     46 
     47 ; SM20-LABEL: .visible .entry foo5(
     48 ; SM20: ld.global.u64
     49 ; SM35-LABEL: .visible .entry foo5(
     50 ; SM35: ld.global.nc.u64
     51 define void @foo5(i64 * noalias readonly %from, i64 * %to) {
     52   %1 = load i64, i64 * %from
     53   store i64 %1, i64 * %to
     54   ret void
     55 }
     56 
     57 ; i128 is non standard integer in nvptx64
     58 ; SM20-LABEL: .visible .entry foo6(
     59 ; SM20: ld.global.u64
     60 ; SM20: ld.global.u64
     61 ; SM35-LABEL: .visible .entry foo6(
     62 ; SM35: ld.global.nc.u64
     63 ; SM35: ld.global.nc.u64
     64 define void @foo6(i128 * noalias readonly %from, i128 * %to) {
     65   %1 = load i128, i128 * %from
     66   store i128 %1, i128 * %to
     67   ret void
     68 }
     69 
     70 ; SM20-LABEL: .visible .entry foo7(
     71 ; SM20: ld.global.v2.u8
     72 ; SM35-LABEL: .visible .entry foo7(
     73 ; SM35: ld.global.nc.v2.u8
     74 define void @foo7(<2 x i8> * noalias readonly %from, <2 x i8> * %to) {
     75   %1 = load <2 x i8>, <2 x i8> * %from
     76   store <2 x i8> %1, <2 x i8> * %to
     77   ret void
     78 }
     79 
     80 ; SM20-LABEL: .visible .entry foo8(
     81 ; SM20: ld.global.v2.u16
     82 ; SM35-LABEL: .visible .entry foo8(
     83 ; SM35: ld.global.nc.v2.u16
     84 define void @foo8(<2 x i16> * noalias readonly %from, <2 x i16> * %to) {
     85   %1 = load <2 x i16>, <2 x i16> * %from
     86   store <2 x i16> %1, <2 x i16> * %to
     87   ret void
     88 }
     89 
     90 ; SM20-LABEL: .visible .entry foo9(
     91 ; SM20: ld.global.v2.u32
     92 ; SM35-LABEL: .visible .entry foo9(
     93 ; SM35: ld.global.nc.v2.u32
     94 define void @foo9(<2 x i32> * noalias readonly %from, <2 x i32> * %to) {
     95   %1 = load <2 x i32>, <2 x i32> * %from
     96   store <2 x i32> %1, <2 x i32> * %to
     97   ret void
     98 }
     99 
    100 ; SM20-LABEL: .visible .entry foo10(
    101 ; SM20: ld.global.v2.u64
    102 ; SM35-LABEL: .visible .entry foo10(
    103 ; SM35: ld.global.nc.v2.u64
    104 define void @foo10(<2 x i64> * noalias readonly %from, <2 x i64> * %to) {
    105   %1 = load <2 x i64>, <2 x i64> * %from
    106   store <2 x i64> %1, <2 x i64> * %to
    107   ret void
    108 }
    109 
    110 ; SM20-LABEL: .visible .entry foo11(
    111 ; SM20: ld.global.v2.f32
    112 ; SM35-LABEL: .visible .entry foo11(
    113 ; SM35: ld.global.nc.v2.f32
    114 define void @foo11(<2 x float> * noalias readonly %from, <2 x float> * %to) {
    115   %1 = load <2 x float>, <2 x float> * %from
    116   store <2 x float> %1, <2 x float> * %to
    117   ret void
    118 }
    119 
    120 ; SM20-LABEL: .visible .entry foo12(
    121 ; SM20: ld.global.v2.f64
    122 ; SM35-LABEL: .visible .entry foo12(
    123 ; SM35: ld.global.nc.v2.f64
    124 define void @foo12(<2 x double> * noalias readonly %from, <2 x double> * %to) {
    125   %1 = load <2 x double>, <2 x double> * %from
    126   store <2 x double> %1, <2 x double> * %to
    127   ret void
    128 }
    129 
    130 ; SM20-LABEL: .visible .entry foo13(
    131 ; SM20: ld.global.v4.u8
    132 ; SM35-LABEL: .visible .entry foo13(
    133 ; SM35: ld.global.nc.v4.u8
    134 define void @foo13(<4 x i8> * noalias readonly %from, <4 x i8> * %to) {
    135   %1 = load <4 x i8>, <4 x i8> * %from
    136   store <4 x i8> %1, <4 x i8> * %to
    137   ret void
    138 }
    139 
    140 ; SM20-LABEL: .visible .entry foo14(
    141 ; SM20: ld.global.v4.u16
    142 ; SM35-LABEL: .visible .entry foo14(
    143 ; SM35: ld.global.nc.v4.u16
    144 define void @foo14(<4 x i16> * noalias readonly %from, <4 x i16> * %to) {
    145   %1 = load <4 x i16>, <4 x i16> * %from
    146   store <4 x i16> %1, <4 x i16> * %to
    147   ret void
    148 }
    149 
    150 ; SM20-LABEL: .visible .entry foo15(
    151 ; SM20: ld.global.v4.u32
    152 ; SM35-LABEL: .visible .entry foo15(
    153 ; SM35: ld.global.nc.v4.u32
    154 define void @foo15(<4 x i32> * noalias readonly %from, <4 x i32> * %to) {
    155   %1 = load <4 x i32>, <4 x i32> * %from
    156   store <4 x i32> %1, <4 x i32> * %to
    157   ret void
    158 }
    159 
    160 ; SM20-LABEL: .visible .entry foo16(
    161 ; SM20: ld.global.v4.f32
    162 ; SM35-LABEL: .visible .entry foo16(
    163 ; SM35: ld.global.nc.v4.f32
    164 define void @foo16(<4 x float> * noalias readonly %from, <4 x float> * %to) {
    165   %1 = load <4 x float>, <4 x float> * %from
    166   store <4 x float> %1, <4 x float> * %to
    167   ret void
    168 }
    169 
    170 ; SM20-LABEL: .visible .entry foo17(
    171 ; SM20: ld.global.v2.f64
    172 ; SM20: ld.global.v2.f64
    173 ; SM35-LABEL: .visible .entry foo17(
    174 ; SM35: ld.global.nc.v2.f64
    175 ; SM35: ld.global.nc.v2.f64
    176 define void @foo17(<4 x double> * noalias readonly %from, <4 x double> * %to) {
    177   %1 = load <4 x double>, <4 x double> * %from
    178   store <4 x double> %1, <4 x double> * %to
    179   ret void
    180 }
    181 
    182 ; SM20-LABEL: .visible .entry foo18(
    183 ; SM20: ld.global.u64
    184 ; SM35-LABEL: .visible .entry foo18(
    185 ; SM35: ld.global.nc.u64
    186 define void @foo18(float ** noalias readonly %from, float ** %to) {
    187   %1 = load float *, float ** %from
    188   store float * %1, float ** %to
    189   ret void
    190 }
    191 
    192 ; Test that we can infer a cached load for a pointer induction variable.
    193 ; SM20-LABEL: .visible .entry foo19(
    194 ; SM20: ld.global.f32
    195 ; SM35-LABEL: .visible .entry foo19(
    196 ; SM35: ld.global.nc.f32
    197 define void @foo19(float * noalias readonly %from, float * %to, i32 %n) {
    198 entry:
    199   br label %loop
    200 
    201 loop:
    202   %i = phi i32 [ 0, %entry ], [ %nexti, %loop ]
    203   %sum = phi float [ 0.0, %entry ], [ %nextsum, %loop ]
    204   %ptr = getelementptr inbounds float, float * %from, i32 %i
    205   %value = load float, float * %ptr, align 4
    206   %nextsum = fadd float %value, %sum
    207   %nexti = add nsw i32 %i, 1
    208   %exitcond = icmp eq i32 %nexti, %n
    209   br i1 %exitcond, label %exit, label %loop
    210 
    211 exit:
    212   store float %nextsum, float * %to
    213   ret void
    214 }
    215 
    216 ; This test captures the case of a non-kernel function. In a
    217 ; non-kernel function, without interprocedural analysis, we do not
    218 ; know that the parameter is global. We also do not know that the
    219 ; pointed-to memory is never written to (for the duration of the
    220 ; kernel). For both reasons, we cannot use a cached load here.
    221 ; SM20-LABEL: notkernel(
    222 ; SM20: ld.f32
    223 ; SM35-LABEL: notkernel(
    224 ; SM35: ld.f32
    225 define void @notkernel(float * noalias readonly %from, float * %to) {
    226   %1 = load float, float * %from
    227   store float %1, float * %to
    228   ret void
    229 }
    230 
    231 ; As @notkernel, but with the parameter explicitly marked as global. We still
    232 ; do not know that the parameter is never written to (for the duration of the
    233 ; kernel). This case does not currently come up normally since we do not infer
    234 ; that pointers are global interprocedurally as of 2015-08-05.
    235 ; SM20-LABEL: notkernel2(
    236 ; SM20: ld.global.f32
    237 ; SM35-LABEL: notkernel2(
    238 ; SM35: ld.global.f32
    239 define void @notkernel2(float addrspace(1) * noalias readonly %from, float * %to) {
    240   %1 = load float, float addrspace(1) * %from
    241   store float %1, float * %to
    242   ret void
    243 }
    244 
    245 !nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6, !7 ,!8 ,!9 ,!10 ,!11 ,!12, !13, !14, !15, !16, !17, !18, !19}
    246 !1 = !{void (float *, float *)* @foo1, !"kernel", i32 1}
    247 !2 = !{void (double *, double *)* @foo2, !"kernel", i32 1}
    248 !3 = !{void (i16 *, i16 *)* @foo3, !"kernel", i32 1}
    249 !4 = !{void (i32 *, i32 *)* @foo4, !"kernel", i32 1}
    250 !5 = !{void (i64 *, i64 *)* @foo5, !"kernel", i32 1}
    251 !6 = !{void (i128 *, i128 *)* @foo6, !"kernel", i32 1}
    252 !7 = !{void (<2 x i8> *, <2 x i8> *)* @foo7, !"kernel", i32 1}
    253 !8 = !{void (<2 x i16> *, <2 x i16> *)* @foo8, !"kernel", i32 1}
    254 !9 = !{void (<2 x i32> *, <2 x i32> *)* @foo9, !"kernel", i32 1}
    255 !10 = !{void (<2 x i64> *, <2 x i64> *)* @foo10, !"kernel", i32 1}
    256 !11 = !{void (<2 x float> *, <2 x float> *)* @foo11, !"kernel", i32 1}
    257 !12 = !{void (<2 x double> *, <2 x double> *)* @foo12, !"kernel", i32 1}
    258 !13 = !{void (<4 x i8> *, <4 x i8> *)* @foo13, !"kernel", i32 1}
    259 !14 = !{void (<4 x i16> *, <4 x i16> *)* @foo14, !"kernel", i32 1}
    260 !15 = !{void (<4 x i32> *, <4 x i32> *)* @foo15, !"kernel", i32 1}
    261 !16 = !{void (<4 x float> *, <4 x float> *)* @foo16, !"kernel", i32 1}
    262 !17 = !{void (<4 x double> *, <4 x double> *)* @foo17, !"kernel", i32 1}
    263 !18 = !{void (float **, float **)* @foo18, !"kernel", i32 1}
    264 !19 = !{void (float *, float *, i32)* @foo19, !"kernel", i32 1}
    265