Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc -O2 < %s -march=nvptx -mcpu=sm_35 | FileCheck %s --check-prefix=O2 --check-prefix=CHECK
      2 ; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_35 | FileCheck %s --check-prefix=O0 --check-prefix=CHECK
      3 
      4 ; The following IR
      5 ;
      6 ;   quot = n / d
      7 ;   rem  = n % d
      8 ;
      9 ; should be transformed into
     10 ;
     11 ;   quot = n / d
     12 ;   rem = n - (n / d) * d
     13 ;
     14 ; during NVPTX isel, at -O2.  At -O0, we should leave it alone.
     15 
     16 ; CHECK-LABEL: sdiv32(
     17 define void @sdiv32(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
     18   ; CHECK: div.s32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]];
     19   %quot = sdiv i32 %n, %d
     20 
     21   ; O0: rem.s32
     22   ; (This is unfortunately order-sensitive, even though mul is commutative.)
     23   ; O2: mul.lo.s32 [[mul:%r[0-9]+]], [[quot]], [[den]];
     24   ; O2: sub.s32 [[rem:%r[0-9]+]], [[num]], [[mul]]
     25   %rem = srem i32 %n, %d
     26 
     27   ; O2: st{{.*}}[[quot]]
     28   store i32 %quot, i32* %quot_ret
     29   ; O2: st{{.*}}[[rem]]
     30   store i32 %rem, i32* %rem_ret
     31   ret void
     32 }
     33 
     34 ; CHECK-LABEL: udiv32(
     35 define void @udiv32(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
     36   ; CHECK: div.u32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]];
     37   %quot = udiv i32 %n, %d
     38 
     39   ; O0: rem.u32
     40 
     41   ; Selection DAG doesn't know whether this is signed or unsigned
     42   ; multiplication and subtraction, but it doesn't make a difference either
     43   ; way.
     44   ; O2: mul.lo.{{u|s}}32 [[mul:%r[0-9]+]], [[quot]], [[den]];
     45   ; O2: sub.{{u|s}}32 [[rem:%r[0-9]+]], [[num]], [[mul]]
     46   %rem = urem i32 %n, %d
     47 
     48   ; O2: st{{.*}}[[quot]]
     49   store i32 %quot, i32* %quot_ret
     50   ; O2: st{{.*}}[[rem]]
     51   store i32 %rem, i32* %rem_ret
     52   ret void
     53 }
     54 
     55 ; Check that we don't perform this optimization if one operation is signed and
     56 ; the other isn't.
     57 ; CHECK-LABEL: mismatched_types1(
     58 define void @mismatched_types1(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
     59   ; CHECK: div.u32
     60   ; CHECK: rem.s32
     61   %quot = udiv i32 %n, %d
     62   %rem = srem i32 %n, %d
     63   store i32 %quot, i32* %quot_ret
     64   store i32 %rem, i32* %rem_ret
     65   ret void
     66 }
     67 
     68 ; CHECK-LABEL: mismatched_types2(
     69 define void @mismatched_types2(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
     70   ; CHECK: div.s32
     71   ; CHECK: rem.u32
     72   %quot = sdiv i32 %n, %d
     73   %rem = urem i32 %n, %d
     74   store i32 %quot, i32* %quot_ret
     75   store i32 %rem, i32* %rem_ret
     76   ret void
     77 }
     78 
     79 ; Check that we don't perform this optimization if the inputs to the div don't
     80 ; match the inputs to the rem.
     81 ; CHECK-LABEL: mismatched_inputs1(
     82 define void @mismatched_inputs1(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
     83   ; CHECK: div.s32
     84   ; CHECK: rem.s32
     85   %quot = sdiv i32 %n, %d
     86   %rem = srem i32 %d, %n
     87   store i32 %quot, i32* %quot_ret
     88   store i32 %rem, i32* %rem_ret
     89   ret void
     90 }
     91 
     92 ; CHECK-LABEL: mismatched_inputs2(
     93 define void @mismatched_inputs2(i32 %n1, i32 %n2, i32 %d, i32* %quot_ret, i32* %rem_ret) {
     94   ; CHECK: div.s32
     95   ; CHECK: rem.s32
     96   %quot = sdiv i32 %n1, %d
     97   %rem = srem i32 %n2, %d
     98   store i32 %quot, i32* %quot_ret
     99   store i32 %rem, i32* %rem_ret
    100   ret void
    101 }
    102 
    103 ; CHECK-LABEL: mismatched_inputs3(
    104 define void @mismatched_inputs3(i32 %n, i32 %d1, i32 %d2, i32* %quot_ret, i32* %rem_ret) {
    105   ; CHECK: div.s32
    106   ; CHECK: rem.s32
    107   %quot = sdiv i32 %n, %d1
    108   %rem = srem i32 %n, %d2
    109   store i32 %quot, i32* %quot_ret
    110   store i32 %rem, i32* %rem_ret
    111   ret void
    112 }
    113