Home | History | Annotate | Download | only in CodeGen
      1 // REQUIRES: nvptx-registered-target
      2 // REQUIRES: nvptx64-registered-target
      3 // RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
      4 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
      5 
      6 int read_tid() {
      7 
      8 // CHECK: call i32 @llvm.ptx.read.tid.x()
      9 // CHECK: call i32 @llvm.ptx.read.tid.y()
     10 // CHECK: call i32 @llvm.ptx.read.tid.z()
     11 // CHECK: call i32 @llvm.ptx.read.tid.w()
     12 
     13   int x = __builtin_ptx_read_tid_x();
     14   int y = __builtin_ptx_read_tid_y();
     15   int z = __builtin_ptx_read_tid_z();
     16   int w = __builtin_ptx_read_tid_w();
     17 
     18   return x + y + z + w;
     19 
     20 }
     21 
     22 int read_ntid() {
     23 
     24 // CHECK: call i32 @llvm.ptx.read.ntid.x()
     25 // CHECK: call i32 @llvm.ptx.read.ntid.y()
     26 // CHECK: call i32 @llvm.ptx.read.ntid.z()
     27 // CHECK: call i32 @llvm.ptx.read.ntid.w()
     28 
     29   int x = __builtin_ptx_read_ntid_x();
     30   int y = __builtin_ptx_read_ntid_y();
     31   int z = __builtin_ptx_read_ntid_z();
     32   int w = __builtin_ptx_read_ntid_w();
     33 
     34   return x + y + z + w;
     35 
     36 }
     37 
     38 int read_ctaid() {
     39 
     40 // CHECK: call i32 @llvm.ptx.read.ctaid.x()
     41 // CHECK: call i32 @llvm.ptx.read.ctaid.y()
     42 // CHECK: call i32 @llvm.ptx.read.ctaid.z()
     43 // CHECK: call i32 @llvm.ptx.read.ctaid.w()
     44 
     45   int x = __builtin_ptx_read_ctaid_x();
     46   int y = __builtin_ptx_read_ctaid_y();
     47   int z = __builtin_ptx_read_ctaid_z();
     48   int w = __builtin_ptx_read_ctaid_w();
     49 
     50   return x + y + z + w;
     51 
     52 }
     53 
     54 int read_nctaid() {
     55 
     56 // CHECK: call i32 @llvm.ptx.read.nctaid.x()
     57 // CHECK: call i32 @llvm.ptx.read.nctaid.y()
     58 // CHECK: call i32 @llvm.ptx.read.nctaid.z()
     59 // CHECK: call i32 @llvm.ptx.read.nctaid.w()
     60 
     61   int x = __builtin_ptx_read_nctaid_x();
     62   int y = __builtin_ptx_read_nctaid_y();
     63   int z = __builtin_ptx_read_nctaid_z();
     64   int w = __builtin_ptx_read_nctaid_w();
     65 
     66   return x + y + z + w;
     67 
     68 }
     69 
     70 int read_ids() {
     71 
     72 // CHECK: call i32 @llvm.ptx.read.laneid()
     73 // CHECK: call i32 @llvm.ptx.read.warpid()
     74 // CHECK: call i32 @llvm.ptx.read.nwarpid()
     75 // CHECK: call i32 @llvm.ptx.read.smid()
     76 // CHECK: call i32 @llvm.ptx.read.nsmid()
     77 // CHECK: call i32 @llvm.ptx.read.gridid()
     78 
     79   int a = __builtin_ptx_read_laneid();
     80   int b = __builtin_ptx_read_warpid();
     81   int c = __builtin_ptx_read_nwarpid();
     82   int d = __builtin_ptx_read_smid();
     83   int e = __builtin_ptx_read_nsmid();
     84   int f = __builtin_ptx_read_gridid();
     85 
     86   return a + b + c + d + e + f;
     87 
     88 }
     89 
     90 int read_lanemasks() {
     91 
     92 // CHECK: call i32 @llvm.ptx.read.lanemask.eq()
     93 // CHECK: call i32 @llvm.ptx.read.lanemask.le()
     94 // CHECK: call i32 @llvm.ptx.read.lanemask.lt()
     95 // CHECK: call i32 @llvm.ptx.read.lanemask.ge()
     96 // CHECK: call i32 @llvm.ptx.read.lanemask.gt()
     97 
     98   int a = __builtin_ptx_read_lanemask_eq();
     99   int b = __builtin_ptx_read_lanemask_le();
    100   int c = __builtin_ptx_read_lanemask_lt();
    101   int d = __builtin_ptx_read_lanemask_ge();
    102   int e = __builtin_ptx_read_lanemask_gt();
    103 
    104   return a + b + c + d + e;
    105 
    106 }
    107 
    108 
    109 long read_clocks() {
    110 
    111 // CHECK: call i32 @llvm.ptx.read.clock()
    112 // CHECK: call i64 @llvm.ptx.read.clock64()
    113 
    114   int a = __builtin_ptx_read_clock();
    115   long b = __builtin_ptx_read_clock64();
    116 
    117   return (long)a + b;
    118 
    119 }
    120 
    121 int read_pms() {
    122 
    123 // CHECK: call i32 @llvm.ptx.read.pm0()
    124 // CHECK: call i32 @llvm.ptx.read.pm1()
    125 // CHECK: call i32 @llvm.ptx.read.pm2()
    126 // CHECK: call i32 @llvm.ptx.read.pm3()
    127 
    128   int a = __builtin_ptx_read_pm0();
    129   int b = __builtin_ptx_read_pm1();
    130   int c = __builtin_ptx_read_pm2();
    131   int d = __builtin_ptx_read_pm3();
    132 
    133   return a + b + c + d;
    134 
    135 }
    136 
    137 void sync() {
    138 
    139 // CHECK: call void @llvm.ptx.bar.sync(i32 0)
    140 
    141   __builtin_ptx_bar_sync(0);
    142 
    143 }
    144 
    145 
    146 // NVVM intrinsics
    147 
    148 // The idea is not to test all intrinsics, just that Clang is recognizing the
    149 // builtins defined in BuiltinsNVPTX.def
    150 void nvvm_math(float f1, float f2, double d1, double d2) {
    151 // CHECK: call float @llvm.nvvm.fmax.f
    152   float t1 = __nvvm_fmax_f(f1, f2);
    153 // CHECK: call float @llvm.nvvm.fmin.f
    154   float t2 = __nvvm_fmin_f(f1, f2);
    155 // CHECK: call float @llvm.nvvm.sqrt.rn.f
    156   float t3 = __nvvm_sqrt_rn_f(f1);
    157 // CHECK: call float @llvm.nvvm.rcp.rn.f
    158   float t4 = __nvvm_rcp_rn_f(f2);
    159 
    160 // CHECK: call double @llvm.nvvm.fmax.d
    161   double td1 = __nvvm_fmax_d(d1, d2);
    162 // CHECK: call double @llvm.nvvm.fmin.d
    163   double td2 = __nvvm_fmin_d(d1, d2);
    164 // CHECK: call double @llvm.nvvm.sqrt.rn.d
    165   double td3 = __nvvm_sqrt_rn_d(d1);
    166 // CHECK: call double @llvm.nvvm.rcp.rn.d
    167   double td4 = __nvvm_rcp_rn_d(d2);
    168 }
    169