Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
      2 
      3 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
      4 
      5 declare i32 @llvm.amdgcn.workitem.id.x() #1
      6 
      7 ; CHECK-LABEL: @basic_merge_sext_index(
      8 ; CHECK: sext i32 %id.x to i64
      9 ; CHECK: load <2 x float>
     10 ; CHECK: store <2 x float> zeroinitializer
     11 define void @basic_merge_sext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 {
     12 entry:
     13   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
     14   %sext.id.x = sext i32 %id.x to i64
     15   %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %sext.id.x
     16   %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %sext.id.x
     17   %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
     18   %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
     19 
     20   %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
     21   %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
     22 
     23   store float 0.0, float addrspace(1)* %a.idx.x, align 4
     24   store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
     25 
     26   %add = fadd float %ld.c, %ld.c.idx.1
     27   store float %add, float addrspace(1)* %b, align 4
     28   ret void
     29 }
     30 
     31 ; CHECK-LABEL: @basic_merge_zext_index(
     32 ; CHECK: zext i32 %id.x to i64
     33 ; CHECK: load <2 x float>
     34 ; CHECK: store <2 x float>
     35 define void @basic_merge_zext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 {
     36 entry:
     37   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
     38   %zext.id.x = zext i32 %id.x to i64
     39   %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x
     40   %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x
     41   %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
     42   %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
     43 
     44   %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
     45   %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
     46   store float 0.0, float addrspace(1)* %a.idx.x, align 4
     47   store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
     48 
     49   %add = fadd float %ld.c, %ld.c.idx.1
     50   store float %add, float addrspace(1)* %b, align 4
     51   ret void
     52 }
     53 
     54 ; CHECK-LABEL: @merge_op_zext_index(
     55 ; CHECK: load <2 x float>
     56 ; CHECK: store <2 x float>
     57 define void @merge_op_zext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 {
     58 entry:
     59   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
     60   %shl = shl i32 %id.x, 2
     61   %zext.id.x = zext i32 %shl to i64
     62   %a.0 = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x
     63   %c.0 = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x
     64 
     65   %id.x.1 = or i32 %shl, 1
     66   %id.x.1.ext = zext i32 %id.x.1 to i64
     67 
     68   %a.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 %id.x.1.ext
     69   %c.1 = getelementptr inbounds float, float addrspace(1)* %c, i64 %id.x.1.ext
     70 
     71   %ld.c.0 = load float, float addrspace(1)* %c.0, align 4
     72   store float 0.0, float addrspace(1)* %a.0, align 4
     73   %ld.c.1 = load float, float addrspace(1)* %c.1, align 4
     74   store float 0.0, float addrspace(1)* %a.1, align 4
     75 
     76   %add = fadd float %ld.c.0, %ld.c.1
     77   store float %add, float addrspace(1)* %b, align 4
     78   ret void
     79 }
     80 
     81 ; CHECK-LABEL: @merge_op_sext_index(
     82 ; CHECK: load <2 x float>
     83 ; CHECK: store <2 x float>
     84 define void @merge_op_sext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 {
     85 entry:
     86   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
     87   %shl = shl i32 %id.x, 2
     88   %zext.id.x = sext i32 %shl to i64
     89   %a.0 = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x
     90   %c.0 = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x
     91 
     92   %id.x.1 = or i32 %shl, 1
     93   %id.x.1.ext = sext i32 %id.x.1 to i64
     94 
     95   %a.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 %id.x.1.ext
     96   %c.1 = getelementptr inbounds float, float addrspace(1)* %c, i64 %id.x.1.ext
     97 
     98   %ld.c.0 = load float, float addrspace(1)* %c.0, align 4
     99   store float 0.0, float addrspace(1)* %a.0, align 4
    100   %ld.c.1 = load float, float addrspace(1)* %c.1, align 4
    101   store float 0.0, float addrspace(1)* %a.1, align 4
    102 
    103   %add = fadd float %ld.c.0, %ld.c.1
    104   store float %add, float addrspace(1)* %b, align 4
    105   ret void
    106 }
    107 
    108 ; This case fails to vectorize if not using the extra extension
    109 ; handling in isConsecutiveAccess.
    110 
    111 ; CHECK-LABEL: @zext_trunc_phi_1(
    112 ; CHECK: loop:
    113 ; CHECK: load <2 x i32>
    114 ; CHECK: store <2 x i32>
    115 define void @zext_trunc_phi_1(i32 addrspace(1)* nocapture noalias %a, i32 addrspace(1)* nocapture noalias %b, i32 addrspace(1)* nocapture readonly noalias %c, i32 %n, i64 %arst, i64 %aoeu) #0 {
    116 entry:
    117   %cmp0 = icmp eq i32 %n, 0
    118   br i1 %cmp0, label %exit, label %loop
    119 
    120 loop:
    121   %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
    122   %trunc.iv = trunc i64 %indvars.iv to i32
    123   %idx = shl i32 %trunc.iv, 4
    124 
    125   %idx.ext = zext i32 %idx to i64
    126   %c.0 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %idx.ext
    127   %a.0 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
    128 
    129   %idx.1 = or i32 %idx, 1
    130   %idx.1.ext = zext i32 %idx.1 to i64
    131   %c.1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %idx.1.ext
    132   %a.1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.1.ext
    133 
    134   %ld.c.0 = load i32, i32 addrspace(1)* %c.0, align 4
    135   store i32 %ld.c.0, i32 addrspace(1)* %a.0, align 4
    136   %ld.c.1 = load i32, i32 addrspace(1)* %c.1, align 4
    137   store i32 %ld.c.1, i32 addrspace(1)* %a.1, align 4
    138 
    139   %indvars.iv.next = add i64 %indvars.iv, 1
    140   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    141 
    142   %exitcond = icmp eq i32 %lftr.wideiv, %n
    143   br i1 %exitcond, label %exit, label %loop
    144 
    145 exit:
    146   ret void
    147 }
    148 
    149 attributes #0 = { nounwind }
    150 attributes #1 = { nounwind readnone }
    151