Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
      2 
      3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
      4 
      5 declare i16 @llvm.ctlz.i16(i16, i1) readnone
      6 declare i32 @llvm.ctlz.i32(i32, i1) readnone
      7 declare i64 @llvm.ctlz.i64(i64, i1) readnone
      8 
      9 ; There should be no difference between llvm.ctlz.i32(%a, true) and
     10 ; llvm.ctlz.i32(%a, false), as ptx's clz(0) is defined to return 0.
     11 
     12 ; CHECK-LABEL: myctlz(
     13 define i32 @myctlz(i32 %a) {
     14 ; CHECK: ld.param.
     15 ; CHECK-NEXT: clz.b32
     16 ; CHECK-NEXT: st.param.
     17 ; CHECK-NEXT: ret;
     18   %val = call i32 @llvm.ctlz.i32(i32 %a, i1 false) readnone
     19   ret i32 %val
     20 }
     21 ; CHECK-LABEL: myctlz_2(
     22 define i32 @myctlz_2(i32 %a) {
     23 ; CHECK: ld.param.
     24 ; CHECK-NEXT: clz.b32
     25 ; CHECK-NEXT: st.param.
     26 ; CHECK-NEXT: ret;
     27   %val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone
     28   ret i32 %val
     29 }
     30 
     31 ; PTX's clz.b64 returns a 32-bit value, but LLVM's intrinsic returns a 64-bit
     32 ; value, so here we have to zero-extend it.
     33 ; CHECK-LABEL: myctlz64(
     34 define i64 @myctlz64(i64 %a) {
     35 ; CHECK: ld.param.
     36 ; CHECK-NEXT: clz.b64
     37 ; CHECK-NEXT: cvt.u64.u32
     38 ; CHECK-NEXT: st.param.
     39 ; CHECK-NEXT: ret;
     40   %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
     41   ret i64 %val
     42 }
     43 ; CHECK-LABEL: myctlz64_2(
     44 define i64 @myctlz64_2(i64 %a) {
     45 ; CHECK: ld.param.
     46 ; CHECK-NEXT: clz.b64
     47 ; CHECK-NEXT: cvt.u64.u32
     48 ; CHECK-NEXT: st.param.
     49 ; CHECK-NEXT: ret;
     50   %val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone
     51   ret i64 %val
     52 }
     53 
     54 ; Here we truncate the 64-bit value of LLVM's ctlz intrinsic to 32 bits, the
     55 ; natural return width of ptx's clz.b64 instruction.  No conversions should be
     56 ; necessary in the PTX.
     57 ; CHECK-LABEL: myctlz64_as_32(
     58 define i32 @myctlz64_as_32(i64 %a) {
     59 ; CHECK: ld.param.
     60 ; CHECK-NEXT: clz.b64
     61 ; CHECK-NEXT: st.param.
     62 ; CHECK-NEXT: ret;
     63   %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
     64   %trunc = trunc i64 %val to i32
     65   ret i32 %trunc
     66 }
     67 ; CHECK-LABEL: myctlz64_as_32_2(
     68 define i32 @myctlz64_as_32_2(i64 %a) {
     69 ; CHECK: ld.param.
     70 ; CHECK-NEXT: clz.b64
     71 ; CHECK-NEXT: st.param.
     72 ; CHECK-NEXT: ret;
     73   %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
     74   %trunc = trunc i64 %val to i32
     75   ret i32 %trunc
     76 }
     77 
     78 ; ctlz.i16 is implemented by extending the input to i32, computing the result,
     79 ; and then truncating the result back down to i16.  But the NVPTX ABI
     80 ; zero-extends i16 return values to i32, so the final truncation doesn't appear
     81 ; in this function.
     82 ; CHECK-LABEL: myctlz_ret16(
     83 define i16 @myctlz_ret16(i16 %a) {
     84 ; CHECK: ld.param.
     85 ; CHECK-NEXT: cvt.u32.u16
     86 ; CHECK-NEXT: clz.b32
     87 ; CHECK-NEXT: sub.
     88 ; CHECK-NEXT: st.param.
     89 ; CHECK-NEXT: ret;
     90   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
     91   ret i16 %val
     92 }
     93 ; CHECK-LABEL: myctlz_ret16_2(
     94 define i16 @myctlz_ret16_2(i16 %a) {
     95 ; CHECK: ld.param.
     96 ; CHECK-NEXT: cvt.u32.u16
     97 ; CHECK-NEXT: clz.b32
     98 ; CHECK-NEXT: sub.
     99 ; CHECK-NEXT: st.param.
    100 ; CHECK-NEXT: ret;
    101   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone
    102   ret i16 %val
    103 }
    104 
    105 ; Here we store the result of ctlz.16 into an i16 pointer, so the trunc should
    106 ; remain.
    107 ; CHECK-LABEL: myctlz_store16(
    108 define void @myctlz_store16(i16 %a, i16* %b) {
    109 ; CHECK: ld.param.
    110 ; CHECK-NEXT: cvt.u32.u16
    111 ; CHECK-NEXT: clz.b32
    112 ; CHECK-DAG: cvt.u16.u32
    113 ; CHECK-DAG: sub.
    114 ; CHECK: st.{{[a-z]}}16
    115 ; CHECK: ret;
    116   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
    117   store i16 %val, i16* %b
    118   ret void
    119 }
    120 ; CHECK-LABEL: myctlz_store16_2(
    121 define void @myctlz_store16_2(i16 %a, i16* %b) {
    122 ; CHECK: ld.param.
    123 ; CHECK-NEXT: cvt.u32.u16
    124 ; CHECK-NEXT: clz.b32
    125 ; CHECK-DAG: cvt.u16.u32
    126 ; CHECK-DAG: sub.
    127 ; CHECK: st.{{[a-z]}}16
    128 ; CHECK: ret;
    129   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
    130   store i16 %val, i16* %b
    131   ret void
    132 }
    133