1 // REQUIRES: nvptx-registered-target 2 // REQUIRES: nvptx64-registered-target 3 // RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s 4 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s 5 6 int read_tid() { 7 8 // CHECK: call i32 @llvm.ptx.read.tid.x() 9 // CHECK: call i32 @llvm.ptx.read.tid.y() 10 // CHECK: call i32 @llvm.ptx.read.tid.z() 11 // CHECK: call i32 @llvm.ptx.read.tid.w() 12 13 int x = __builtin_ptx_read_tid_x(); 14 int y = __builtin_ptx_read_tid_y(); 15 int z = __builtin_ptx_read_tid_z(); 16 int w = __builtin_ptx_read_tid_w(); 17 18 return x + y + z + w; 19 20 } 21 22 int read_ntid() { 23 24 // CHECK: call i32 @llvm.ptx.read.ntid.x() 25 // CHECK: call i32 @llvm.ptx.read.ntid.y() 26 // CHECK: call i32 @llvm.ptx.read.ntid.z() 27 // CHECK: call i32 @llvm.ptx.read.ntid.w() 28 29 int x = __builtin_ptx_read_ntid_x(); 30 int y = __builtin_ptx_read_ntid_y(); 31 int z = __builtin_ptx_read_ntid_z(); 32 int w = __builtin_ptx_read_ntid_w(); 33 34 return x + y + z + w; 35 36 } 37 38 int read_ctaid() { 39 40 // CHECK: call i32 @llvm.ptx.read.ctaid.x() 41 // CHECK: call i32 @llvm.ptx.read.ctaid.y() 42 // CHECK: call i32 @llvm.ptx.read.ctaid.z() 43 // CHECK: call i32 @llvm.ptx.read.ctaid.w() 44 45 int x = __builtin_ptx_read_ctaid_x(); 46 int y = __builtin_ptx_read_ctaid_y(); 47 int z = __builtin_ptx_read_ctaid_z(); 48 int w = __builtin_ptx_read_ctaid_w(); 49 50 return x + y + z + w; 51 52 } 53 54 int read_nctaid() { 55 56 // CHECK: call i32 @llvm.ptx.read.nctaid.x() 57 // CHECK: call i32 @llvm.ptx.read.nctaid.y() 58 // CHECK: call i32 @llvm.ptx.read.nctaid.z() 59 // CHECK: call i32 @llvm.ptx.read.nctaid.w() 60 61 int x = __builtin_ptx_read_nctaid_x(); 62 int y = __builtin_ptx_read_nctaid_y(); 63 int z = __builtin_ptx_read_nctaid_z(); 64 int w = __builtin_ptx_read_nctaid_w(); 65 66 return x + y + z + w; 67 68 } 69 70 int read_ids() { 71 72 // CHECK: call i32 @llvm.ptx.read.laneid() 73 // CHECK: call i32 @llvm.ptx.read.warpid() 74 // CHECK: call i32 @llvm.ptx.read.nwarpid() 75 // CHECK: call i32 @llvm.ptx.read.smid() 76 // CHECK: call i32 @llvm.ptx.read.nsmid() 77 // CHECK: call i32 @llvm.ptx.read.gridid() 78 79 int a = __builtin_ptx_read_laneid(); 80 int b = __builtin_ptx_read_warpid(); 81 int c = __builtin_ptx_read_nwarpid(); 82 int d = __builtin_ptx_read_smid(); 83 int e = __builtin_ptx_read_nsmid(); 84 int f = __builtin_ptx_read_gridid(); 85 86 return a + b + c + d + e + f; 87 88 } 89 90 int read_lanemasks() { 91 92 // CHECK: call i32 @llvm.ptx.read.lanemask.eq() 93 // CHECK: call i32 @llvm.ptx.read.lanemask.le() 94 // CHECK: call i32 @llvm.ptx.read.lanemask.lt() 95 // CHECK: call i32 @llvm.ptx.read.lanemask.ge() 96 // CHECK: call i32 @llvm.ptx.read.lanemask.gt() 97 98 int a = __builtin_ptx_read_lanemask_eq(); 99 int b = __builtin_ptx_read_lanemask_le(); 100 int c = __builtin_ptx_read_lanemask_lt(); 101 int d = __builtin_ptx_read_lanemask_ge(); 102 int e = __builtin_ptx_read_lanemask_gt(); 103 104 return a + b + c + d + e; 105 106 } 107 108 109 long read_clocks() { 110 111 // CHECK: call i32 @llvm.ptx.read.clock() 112 // CHECK: call i64 @llvm.ptx.read.clock64() 113 114 int a = __builtin_ptx_read_clock(); 115 long b = __builtin_ptx_read_clock64(); 116 117 return (long)a + b; 118 119 } 120 121 int read_pms() { 122 123 // CHECK: call i32 @llvm.ptx.read.pm0() 124 // CHECK: call i32 @llvm.ptx.read.pm1() 125 // CHECK: call i32 @llvm.ptx.read.pm2() 126 // CHECK: call i32 @llvm.ptx.read.pm3() 127 128 int a = __builtin_ptx_read_pm0(); 129 int b = __builtin_ptx_read_pm1(); 130 int c = __builtin_ptx_read_pm2(); 131 int d = __builtin_ptx_read_pm3(); 132 133 return a + b + c + d; 134 135 } 136 137 void sync() { 138 139 // CHECK: call void @llvm.ptx.bar.sync(i32 0) 140 141 __builtin_ptx_bar_sync(0); 142 143 } 144 145 146 // NVVM intrinsics 147 148 // The idea is not to test all intrinsics, just that Clang is recognizing the 149 // builtins defined in BuiltinsNVPTX.def 150 void nvvm_math(float f1, float f2, double d1, double d2) { 151 // CHECK: call float @llvm.nvvm.fmax.f 152 float t1 = __nvvm_fmax_f(f1, f2); 153 // CHECK: call float @llvm.nvvm.fmin.f 154 float t2 = __nvvm_fmin_f(f1, f2); 155 // CHECK: call float @llvm.nvvm.sqrt.rn.f 156 float t3 = __nvvm_sqrt_rn_f(f1); 157 // CHECK: call float @llvm.nvvm.rcp.rn.f 158 float t4 = __nvvm_rcp_rn_f(f2); 159 160 // CHECK: call double @llvm.nvvm.fmax.d 161 double td1 = __nvvm_fmax_d(d1, d2); 162 // CHECK: call double @llvm.nvvm.fmin.d 163 double td2 = __nvvm_fmin_d(d1, d2); 164 // CHECK: call double @llvm.nvvm.sqrt.rn.d 165 double td3 = __nvvm_sqrt_rn_d(d1); 166 // CHECK: call double @llvm.nvvm.rcp.rn.d 167 double td4 = __nvvm_rcp_rn_d(d2); 168 169 // CHECK: call void @llvm.nvvm.membar.cta() 170 __nvvm_membar_cta(); 171 // CHECK: call void @llvm.nvvm.membar.gl() 172 __nvvm_membar_gl(); 173 // CHECK: call void @llvm.nvvm.membar.sys() 174 __nvvm_membar_sys(); 175 // CHECK: call void @llvm.nvvm.barrier0() 176 __nvvm_bar0(); 177 } 178