Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
      2 
      3 ; Test that loops with different maximum offsets for different address
      4 ; spaces are correctly handled.
      5 
      6 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
      7 
      8 ; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32(
      9 ; OPT: {{^}}.lr.ph:
     10 ; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
     11 ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095
     12 ; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1
     13 define void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
     14 bb:
     15   %tmp = icmp sgt i32 %n, 0
     16   br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
     17 
     18 .lr.ph.preheader:                                 ; preds = %bb
     19   br label %.lr.ph
     20 
     21 ._crit_edge.loopexit:                             ; preds = %.lr.ph
     22   br label %._crit_edge
     23 
     24 ._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
     25   ret void
     26 
     27 .lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
     28   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
     29   %tmp1 = add nuw nsw i64 %indvars.iv, 4095
     30   %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
     31   %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
     32   %tmp4 = sext i8 %tmp3 to i32
     33   %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
     34   %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
     35   %tmp7 = add nsw i32 %tmp6, %tmp4
     36   store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
     37   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     38   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     39   %exitcond = icmp eq i32 %lftr.wideiv, %n
     40   br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
     41 }
     42 
     43 ; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_p1_i32(
     44 ; OPT: {{^}}.lr.ph.preheader:
     45 ; OPT: %scevgep2 = getelementptr i8, i8 addrspace(1)* %arg1, i64 4096
     46 ; OPT: br label %.lr.ph
     47 
     48 ; OPT: {{^}}.lr.ph:
     49 ; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
     50 ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1
     51 define void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
     52 bb:
     53   %tmp = icmp sgt i32 %n, 0
     54   br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
     55 
     56 .lr.ph.preheader:                                 ; preds = %bb
     57   br label %.lr.ph
     58 
     59 ._crit_edge.loopexit:                             ; preds = %.lr.ph
     60   br label %._crit_edge
     61 
     62 ._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
     63   ret void
     64 
     65 .lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
     66   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
     67   %tmp1 = add nuw nsw i64 %indvars.iv, 4096
     68   %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
     69   %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
     70   %tmp4 = sext i8 %tmp3 to i32
     71   %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
     72   %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
     73   %tmp7 = add nsw i32 %tmp6, %tmp4
     74   store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
     75   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     76   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     77   %exitcond = icmp eq i32 %lftr.wideiv, %n
     78   br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
     79 }
     80 
     81 ; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32(
     82 ; OPT: {{^}}.lr.ph
     83 ; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
     84 ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535
     85 ; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1
     86 define void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
     87 bb:
     88   %tmp = icmp sgt i32 %n, 0
     89   br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
     90 
     91 .lr.ph.preheader:                                 ; preds = %bb
     92   br label %.lr.ph
     93 
     94 ._crit_edge.loopexit:                             ; preds = %.lr.ph
     95   br label %._crit_edge
     96 
     97 ._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
     98   ret void
     99 
    100 .lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
    101   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
    102   %tmp1 = add nuw nsw i64 %indvars.iv, 65535
    103   %tmp2 = trunc i64 %tmp1 to i32
    104   %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
    105   %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
    106   %tmp5 = sext i8 %tmp4 to i32
    107   %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
    108   %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
    109   %tmp8 = add nsw i32 %tmp7, %tmp5
    110   store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
    111   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    112   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    113   %exitcond = icmp eq i32 %lftr.wideiv, %n
    114   br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
    115 }
    116 
    117 ; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_p1_i32(
    118 ; OPT: {{^}}.lr.ph.preheader:
    119 ; OPT: %scevgep2 = getelementptr i8, i8 addrspace(3)* %arg1, i32 65536
    120 ; OPT: br label %.lr.ph
    121 
    122 ; OPT: {{^}}.lr.ph:
    123 ; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
    124 ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1
    125 define void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
    126 bb:
    127   %tmp = icmp sgt i32 %n, 0
    128   br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
    129 
    130 .lr.ph.preheader:                                 ; preds = %bb
    131   br label %.lr.ph
    132 
    133 ._crit_edge.loopexit:                             ; preds = %.lr.ph
    134   br label %._crit_edge
    135 
    136 ._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
    137   ret void
    138 
    139 .lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
    140   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
    141   %tmp1 = add nuw nsw i64 %indvars.iv, 65536
    142   %tmp2 = trunc i64 %tmp1 to i32
    143   %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
    144   %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
    145   %tmp5 = sext i8 %tmp4 to i32
    146   %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
    147   %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
    148   %tmp8 = add nsw i32 %tmp7, %tmp5
    149   store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
    150   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    151   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    152   %exitcond = icmp eq i32 %lftr.wideiv, %n
    153   br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
    154 }
    155 
    156 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hawaii" "unsafe-fp-math"="false" "use-soft-float"="false" }
    157