Home | History | Annotate | Download | only in CodeGenOpenCL
      1 // REQUIRES: amdgpu-registered-target
      2 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
      3 
      4 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
      5 
      6 typedef unsigned long ulong;
      7 
      8 // CHECK-LABEL: @test_div_scale_f64
      9 // CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
     10 // CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1
     11 // CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0
     12 // CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
     13 // CHECK: store i32 [[FLAGEXT]]
     14 void test_div_scale_f64(global double* out, global int* flagout, double a, double b)
     15 {
     16   bool flag;
     17   *out = __builtin_amdgcn_div_scale(a, b, true, &flag);
     18   *flagout = flag;
     19 }
     20 
     21 // CHECK-LABEL: @test_div_scale_f32
     22 // CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
     23 // CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1
     24 // CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0
     25 // CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
     26 // CHECK: store i32 [[FLAGEXT]]
     27 void test_div_scale_f32(global float* out, global int* flagout, float a, float b)
     28 {
     29   bool flag;
     30   *out = __builtin_amdgcn_div_scalef(a, b, true, &flag);
     31   *flagout = flag;
     32 }
     33 
     34 // CHECK-LABEL: @test_div_fmas_f32
     35 // CHECK: call float @llvm.amdgcn.div.fmas.f32
     36 void test_div_fmas_f32(global float* out, float a, float b, float c, int d)
     37 {
     38   *out = __builtin_amdgcn_div_fmasf(a, b, c, d);
     39 }
     40 
     41 // CHECK-LABEL: @test_div_fmas_f64
     42 // CHECK: call double @llvm.amdgcn.div.fmas.f64
     43 void test_div_fmas_f64(global double* out, double a, double b, double c, int d)
     44 {
     45   *out = __builtin_amdgcn_div_fmas(a, b, c, d);
     46 }
     47 
     48 // CHECK-LABEL: @test_div_fixup_f32
     49 // CHECK: call float @llvm.amdgcn.div.fixup.f32
     50 void test_div_fixup_f32(global float* out, float a, float b, float c)
     51 {
     52   *out = __builtin_amdgcn_div_fixupf(a, b, c);
     53 }
     54 
     55 // CHECK-LABEL: @test_div_fixup_f64
     56 // CHECK: call double @llvm.amdgcn.div.fixup.f64
     57 void test_div_fixup_f64(global double* out, double a, double b, double c)
     58 {
     59   *out = __builtin_amdgcn_div_fixup(a, b, c);
     60 }
     61 
     62 // CHECK-LABEL: @test_trig_preop_f32
     63 // CHECK: call float @llvm.amdgcn.trig.preop.f32
     64 void test_trig_preop_f32(global float* out, float a, int b)
     65 {
     66   *out = __builtin_amdgcn_trig_preopf(a, b);
     67 }
     68 
     69 // CHECK-LABEL: @test_trig_preop_f64
     70 // CHECK: call double @llvm.amdgcn.trig.preop.f64
     71 void test_trig_preop_f64(global double* out, double a, int b)
     72 {
     73   *out = __builtin_amdgcn_trig_preop(a, b);
     74 }
     75 
     76 // CHECK-LABEL: @test_rcp_f32
     77 // CHECK: call float @llvm.amdgcn.rcp.f32
     78 void test_rcp_f32(global float* out, float a)
     79 {
     80   *out = __builtin_amdgcn_rcpf(a);
     81 }
     82 
     83 // CHECK-LABEL: @test_rcp_f64
     84 // CHECK: call double @llvm.amdgcn.rcp.f64
     85 void test_rcp_f64(global double* out, double a)
     86 {
     87   *out = __builtin_amdgcn_rcp(a);
     88 }
     89 
     90 // CHECK-LABEL: @test_rsq_f32
     91 // CHECK: call float @llvm.amdgcn.rsq.f32
     92 void test_rsq_f32(global float* out, float a)
     93 {
     94   *out = __builtin_amdgcn_rsqf(a);
     95 }
     96 
     97 // CHECK-LABEL: @test_rsq_f64
     98 // CHECK: call double @llvm.amdgcn.rsq.f64
     99 void test_rsq_f64(global double* out, double a)
    100 {
    101   *out = __builtin_amdgcn_rsq(a);
    102 }
    103 
    104 // CHECK-LABEL: @test_rsq_clamp_f32
    105 // CHECK: call float @llvm.amdgcn.rsq.clamp.f32
    106 void test_rsq_clamp_f32(global float* out, float a)
    107 {
    108   *out = __builtin_amdgcn_rsq_clampf(a);
    109 }
    110 
    111 // CHECK-LABEL: @test_rsq_clamp_f64
    112 // CHECK: call double @llvm.amdgcn.rsq.clamp.f64
    113 void test_rsq_clamp_f64(global double* out, double a)
    114 {
    115   *out = __builtin_amdgcn_rsq_clamp(a);
    116 }
    117 
    118 // CHECK-LABEL: @test_sin_f32
    119 // CHECK: call float @llvm.amdgcn.sin.f32
    120 void test_sin_f32(global float* out, float a)
    121 {
    122   *out = __builtin_amdgcn_sinf(a);
    123 }
    124 
    125 // CHECK-LABEL: @test_cos_f32
    126 // CHECK: call float @llvm.amdgcn.cos.f32
    127 void test_cos_f32(global float* out, float a)
    128 {
    129   *out = __builtin_amdgcn_cosf(a);
    130 }
    131 
    132 // CHECK-LABEL: @test_log_clamp_f32
    133 // CHECK: call float @llvm.amdgcn.log.clamp.f32
    134 void test_log_clamp_f32(global float* out, float a)
    135 {
    136   *out = __builtin_amdgcn_log_clampf(a);
    137 }
    138 
    139 // CHECK-LABEL: @test_ldexp_f32
    140 // CHECK: call float @llvm.amdgcn.ldexp.f32
    141 void test_ldexp_f32(global float* out, float a, int b)
    142 {
    143   *out = __builtin_amdgcn_ldexpf(a, b);
    144 }
    145 
    146 // CHECK-LABEL: @test_ldexp_f64
    147 // CHECK: call double @llvm.amdgcn.ldexp.f64
    148 void test_ldexp_f64(global double* out, double a, int b)
    149 {
    150   *out = __builtin_amdgcn_ldexp(a, b);
    151 }
    152 
    153 // CHECK-LABEL: @test_frexp_mant_f32
    154 // CHECK: call float @llvm.amdgcn.frexp.mant.f32
    155 void test_frexp_mant_f32(global float* out, float a)
    156 {
    157   *out = __builtin_amdgcn_frexp_mantf(a);
    158 }
    159 
    160 // CHECK-LABEL: @test_frexp_mant_f64
    161 // CHECK: call double @llvm.amdgcn.frexp.mant.f64
    162 void test_frexp_mant_f64(global double* out, double a)
    163 {
    164   *out = __builtin_amdgcn_frexp_mant(a);
    165 }
    166 
    167 // CHECK-LABEL: @test_frexp_exp_f32
    168 // CHECK: call i32 @llvm.amdgcn.frexp.exp.f32
    169 void test_frexp_exp_f32(global int* out, float a)
    170 {
    171   *out = __builtin_amdgcn_frexp_expf(a);
    172 }
    173 
    174 // CHECK-LABEL: @test_frexp_exp_f64
    175 // CHECK: call i32 @llvm.amdgcn.frexp.exp.f64
    176 void test_frexp_exp_f64(global int* out, double a)
    177 {
    178   *out = __builtin_amdgcn_frexp_exp(a);
    179 }
    180 
    181 // CHECK-LABEL: @test_fract_f32
    182 // CHECK: call float @llvm.amdgcn.fract.f32
    183 void test_fract_f32(global int* out, float a)
    184 {
    185   *out = __builtin_amdgcn_fractf(a);
    186 }
    187 
    188 // CHECK-LABEL: @test_fract_f64
    189 // CHECK: call double @llvm.amdgcn.fract.f64
    190 void test_fract_f64(global int* out, double a)
    191 {
    192   *out = __builtin_amdgcn_fract(a);
    193 }
    194 
    195 // CHECK-LABEL: @test_class_f32
    196 // CHECK: call i1 @llvm.amdgcn.class.f32
    197 void test_class_f32(global float* out, float a, int b)
    198 {
    199   *out = __builtin_amdgcn_classf(a, b);
    200 }
    201 
    202 // CHECK-LABEL: @test_class_f64
    203 // CHECK: call i1 @llvm.amdgcn.class.f64
    204 void test_class_f64(global double* out, double a, int b)
    205 {
    206   *out = __builtin_amdgcn_class(a, b);
    207 }
    208 
    209 // CHECK-LABEL: @test_s_barrier
    210 // CHECK: call void @llvm.amdgcn.s.barrier(
    211 void test_s_barrier()
    212 {
    213   __builtin_amdgcn_s_barrier();
    214 }
    215 
    216 // CHECK-LABEL: @test_s_memtime
    217 // CHECK: call i64 @llvm.amdgcn.s.memtime()
    218 void test_s_memtime(global ulong* out)
    219 {
    220   *out = __builtin_amdgcn_s_memtime();
    221 }
    222 
    223 // CHECK-LABEL: @test_s_sleep
    224 // CHECK: call void @llvm.amdgcn.s.sleep(i32 1)
    225 // CHECK: call void @llvm.amdgcn.s.sleep(i32 15)
    226 void test_s_sleep()
    227 {
    228   __builtin_amdgcn_s_sleep(1);
    229   __builtin_amdgcn_s_sleep(15);
    230 }
    231 
    232 // CHECK-LABEL: @test_cubeid(
    233 // CHECK: call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
    234 void test_cubeid(global float* out, float a, float b, float c) {
    235   *out = __builtin_amdgcn_cubeid(a, b, c);
    236 }
    237 
    238 // CHECK-LABEL: @test_cubesc(
    239 // CHECK: call float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
    240 void test_cubesc(global float* out, float a, float b, float c) {
    241   *out = __builtin_amdgcn_cubesc(a, b, c);
    242 }
    243 
    244 // CHECK-LABEL: @test_cubetc(
    245 // CHECK: call float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
    246 void test_cubetc(global float* out, float a, float b, float c) {
    247   *out = __builtin_amdgcn_cubetc(a, b, c);
    248 }
    249 
    250 // CHECK-LABEL: @test_cubema(
    251 // CHECK: call float @llvm.amdgcn.cubema(float %a, float %b, float %c)
    252 void test_cubema(global float* out, float a, float b, float c) {
    253   *out = __builtin_amdgcn_cubema(a, b, c);
    254 }
    255 
    256 // CHECK-LABEL: @test_read_exec(
    257 // CHECK: call i64 @llvm.read_register.i64(metadata ![[EXEC:[0-9]+]]) #[[READ_EXEC_ATTRS:[0-9]+]]
    258 void test_read_exec(global ulong* out) {
    259   *out = __builtin_amdgcn_read_exec();
    260 }
    261 
    262 // CHECK: declare i64 @llvm.read_register.i64(metadata) #[[NOUNWIND_READONLY:[0-9]+]]
    263 
    264 // Legacy intrinsics with AMDGPU prefix
    265 
    266 // CHECK-LABEL: @test_legacy_rsq_f32
    267 // CHECK: call float @llvm.amdgcn.rsq.f32
    268 void test_legacy_rsq_f32(global float* out, float a)
    269 {
    270   *out = __builtin_amdgpu_rsqf(a);
    271 }
    272 
    273 // CHECK-LABEL: @test_legacy_rsq_f64
    274 // CHECK: call double @llvm.amdgcn.rsq.f64
    275 void test_legacy_rsq_f64(global double* out, double a)
    276 {
    277   *out = __builtin_amdgpu_rsq(a);
    278 }
    279 
    280 // CHECK-LABEL: @test_legacy_ldexp_f32
    281 // CHECK: call float @llvm.amdgcn.ldexp.f32
    282 void test_legacy_ldexp_f32(global float* out, float a, int b)
    283 {
    284   *out = __builtin_amdgpu_ldexpf(a, b);
    285 }
    286 
    287 // CHECK-LABEL: @test_legacy_ldexp_f64
    288 // CHECK: call double @llvm.amdgcn.ldexp.f64
    289 void test_legacy_ldexp_f64(global double* out, double a, int b)
    290 {
    291   *out = __builtin_amdgpu_ldexp(a, b);
    292 }
    293 
    294 // CHECK-LABEL: @test_kernarg_segment_ptr
    295 // CHECK: call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
    296 void test_kernarg_segment_ptr(__attribute__((address_space(2))) unsigned char ** out)
    297 {
    298   *out = __builtin_amdgcn_kernarg_segment_ptr();
    299 }
    300 
    301 // CHECK-LABEL: @test_implicitarg_ptr
    302 // CHECK: call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
    303 void test_implicitarg_ptr(__attribute__((address_space(2))) unsigned char ** out)
    304 {
    305   *out = __builtin_amdgcn_implicitarg_ptr();
    306 }
    307 
    308 // CHECK-LABEL: @test_get_group_id(
    309 // CHECK: tail call i32 @llvm.amdgcn.workgroup.id.x()
    310 // CHECK: tail call i32 @llvm.amdgcn.workgroup.id.y()
    311 // CHECK: tail call i32 @llvm.amdgcn.workgroup.id.z()
    312 void test_get_group_id(int d, global int *out)
    313 {
    314 	switch (d) {
    315 	case 0: *out = __builtin_amdgcn_workgroup_id_x(); break;
    316 	case 1: *out = __builtin_amdgcn_workgroup_id_y(); break;
    317 	case 2: *out = __builtin_amdgcn_workgroup_id_z(); break;
    318 	default: *out = 0;
    319 	}
    320 }
    321 
    322 // CHECK-LABEL: @test_get_local_id(
    323 // CHECK: tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[WI_RANGE:![0-9]*]]
    324 // CHECK: tail call i32 @llvm.amdgcn.workitem.id.y(), !range [[WI_RANGE]]
    325 // CHECK: tail call i32 @llvm.amdgcn.workitem.id.z(), !range [[WI_RANGE]]
    326 void test_get_local_id(int d, global int *out)
    327 {
    328 	switch (d) {
    329 	case 0: *out = __builtin_amdgcn_workitem_id_x(); break;
    330 	case 1: *out = __builtin_amdgcn_workitem_id_y(); break;
    331 	case 2: *out = __builtin_amdgcn_workitem_id_z(); break;
    332 	default: *out = 0;
    333 	}
    334 }
    335 
    336 // CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024}
    337 // CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
    338 // CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent }
    339 // CHECK-DAG: ![[EXEC]] = !{!"exec"}
    340