Home | History | Annotate | Download | only in Hexagon
      1 ; RUN: llc -march=hexagon -O0 < %s | FileCheck --check-prefix=CHECKO0 %s
      2 ; KP: Removed -O2 check. The code has become more aggressively optimized
      3 ; (some loads were found to be redundant and have been removed completely),
      4 ; and verifying correct code generation has become more difficult than
      5 ; its worth.
      6 
      7 ; CHECK: v{{[0-9]*}} = vsplat(r{{[0-9]*}})
      8 ; CHECK: v{{[0-9]*}} = vsplat(r{{[0-9]*}})
      9 
     10 ; CHECKO0: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}}
     11 ; CHECKO0: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#0)
     12 ; CHECKO0: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#0)
     13 
     14 ; Allow .cur loads.
     15 ; CHECKO2: v{{[0-9].*}} = vmem(r{{[0-9]*}}+#0)
     16 ; CHECKO2: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}}
     17 ; CHECKO2: v{{[0-9].*}} = vmem(r{{[0-9]*}}+#0)
     18 
     19 ; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vcombine(v{{[0-9]*}},v{{[0-9]*}})
     20 ; CHECK: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}}
     21 ; CHECK: vmem(r{{[0-9]*}}+#32) = v{{[0-9]*}}
     22 ; CHECK: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#0)
     23 ; CHECK: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#32)
     24 ; CHECK: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}}
     25 ; CHECK: vmem(r{{[0-9]*}}+#32) = v{{[0-9]*}}
     26 
     27 target triple = "hexagon"
     28 
     29 @g0 = common global [10 x <32 x i32>] zeroinitializer, align 64
     30 @g1 = private unnamed_addr constant [11 x i8] c"c[%d]= %x\0A\00", align 8
     31 @g2 = common global [10 x <16 x i32>] zeroinitializer, align 64
     32 @g3 = common global [10 x <16 x i32>] zeroinitializer, align 64
     33 @g4 = common global [10 x <32 x i32>] zeroinitializer, align 64
     34 
     35 declare i32 @f0(i8*, ...)
     36 
     37 ; Function Attrs: nounwind
     38 define void @f1(i32 %a0) #0 {
     39 b0:
     40   %v0 = alloca i32, align 4
     41   %v1 = alloca i32*, align 4
     42   %v2 = alloca i32, align 4
     43   store i32 %a0, i32* %v0, align 4
     44   store i32* getelementptr inbounds ([10 x <32 x i32>], [10 x <32 x i32>]* @g0, i32 0, i32 0, i32 0), i32** %v1, align 4
     45   %v3 = load i32, i32* %v0, align 4
     46   %v4 = load i32*, i32** %v1, align 4
     47   %v5 = getelementptr inbounds i32, i32* %v4, i32 %v3
     48   store i32* %v5, i32** %v1, align 4
     49   store i32 0, i32* %v2, align 4
     50   br label %b1
     51 
     52 b1:                                               ; preds = %b3, %b0
     53   %v6 = load i32, i32* %v2, align 4
     54   %v7 = icmp slt i32 %v6, 16
     55   br i1 %v7, label %b2, label %b4
     56 
     57 b2:                                               ; preds = %b1
     58   %v8 = load i32, i32* %v2, align 4
     59   %v9 = load i32*, i32** %v1, align 4
     60   %v10 = getelementptr inbounds i32, i32* %v9, i32 1
     61   store i32* %v10, i32** %v1, align 4
     62   %v11 = load i32, i32* %v9, align 4
     63   %v12 = call i32 (i8*, ...) @f0(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @g1, i32 0, i32 0), i32 %v8, i32 %v11)
     64   br label %b3
     65 
     66 b3:                                               ; preds = %b2
     67   %v13 = load i32, i32* %v2, align 4
     68   %v14 = add nsw i32 %v13, 1
     69   store i32 %v14, i32* %v2, align 4
     70   br label %b1
     71 
     72 b4:                                               ; preds = %b1
     73   ret void
     74 }
     75 
     76 ; Function Attrs: nounwind
     77 define i32 @f2() #0 {
     78 b0:
     79   %v0 = alloca i32, align 4
     80   %v1 = alloca i32, align 4
     81   store i32 0, i32* %v0
     82   store i32 0, i32* %v1, align 4
     83   br label %b1
     84 
     85 b1:                                               ; preds = %b3, %b0
     86   %v2 = load i32, i32* %v1, align 4
     87   %v3 = icmp slt i32 %v2, 3
     88   br i1 %v3, label %b2, label %b4
     89 
     90 b2:                                               ; preds = %b1
     91   %v4 = load i32, i32* %v1, align 4
     92   %v5 = add nsw i32 %v4, 1
     93   %v6 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v5)
     94   %v7 = load i32, i32* %v1, align 4
     95   %v8 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g2, i32 0, i32 %v7
     96   store <16 x i32> %v6, <16 x i32>* %v8, align 64
     97   %v9 = load i32, i32* %v1, align 4
     98   %v10 = mul nsw i32 %v9, 10
     99   %v11 = add nsw i32 %v10, 1
    100   %v12 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v11)
    101   %v13 = load i32, i32* %v1, align 4
    102   %v14 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g3, i32 0, i32 %v13
    103   store <16 x i32> %v12, <16 x i32>* %v14, align 64
    104   %v15 = load i32, i32* %v1, align 4
    105   %v16 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g2, i32 0, i32 %v15
    106   %v17 = load <16 x i32>, <16 x i32>* %v16, align 64
    107   %v18 = load i32, i32* %v1, align 4
    108   %v19 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g3, i32 0, i32 %v18
    109   %v20 = load <16 x i32>, <16 x i32>* %v19, align 64
    110   %v21 = call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v17, <16 x i32> %v20)
    111   %v22 = load i32, i32* %v1, align 4
    112   %v23 = getelementptr inbounds [10 x <32 x i32>], [10 x <32 x i32>]* @g4, i32 0, i32 %v22
    113   store <32 x i32> %v21, <32 x i32>* %v23, align 64
    114   br label %b3
    115 
    116 b3:                                               ; preds = %b2
    117   %v24 = load i32, i32* %v1, align 4
    118   %v25 = add nsw i32 %v24, 1
    119   store i32 %v25, i32* %v1, align 4
    120   br label %b1
    121 
    122 b4:                                               ; preds = %b1
    123   store i32 0, i32* %v1, align 4
    124   br label %b5
    125 
    126 b5:                                               ; preds = %b7, %b4
    127   %v26 = load i32, i32* %v1, align 4
    128   %v27 = icmp slt i32 %v26, 3
    129   br i1 %v27, label %b6, label %b8
    130 
    131 b6:                                               ; preds = %b5
    132   %v28 = load i32, i32* %v1, align 4
    133   %v29 = getelementptr inbounds [10 x <32 x i32>], [10 x <32 x i32>]* @g4, i32 0, i32 %v28
    134   %v30 = load <32 x i32>, <32 x i32>* %v29, align 64
    135   %v31 = load i32, i32* %v1, align 4
    136   %v32 = getelementptr inbounds [10 x <32 x i32>], [10 x <32 x i32>]* @g0, i32 0, i32 %v31
    137   store <32 x i32> %v30, <32 x i32>* %v32, align 64
    138   br label %b7
    139 
    140 b7:                                               ; preds = %b6
    141   %v33 = load i32, i32* %v1, align 4
    142   %v34 = add nsw i32 %v33, 1
    143   store i32 %v34, i32* %v1, align 4
    144   br label %b5
    145 
    146 b8:                                               ; preds = %b5
    147   store i32 0, i32* %v1, align 4
    148   br label %b9
    149 
    150 b9:                                               ; preds = %b11, %b8
    151   %v35 = load i32, i32* %v1, align 4
    152   %v36 = icmp slt i32 %v35, 3
    153   br i1 %v36, label %b10, label %b12
    154 
    155 b10:                                              ; preds = %b9
    156   %v37 = load i32, i32* %v1, align 4
    157   %v38 = mul nsw i32 %v37, 16
    158   call void @f1(i32 %v38)
    159   br label %b11
    160 
    161 b11:                                              ; preds = %b10
    162   %v39 = load i32, i32* %v1, align 4
    163   %v40 = add nsw i32 %v39, 1
    164   store i32 %v40, i32* %v1, align 4
    165   br label %b9
    166 
    167 b12:                                              ; preds = %b9
    168   ret i32 0
    169 }
    170 
    171 ; Function Attrs: nounwind readnone
    172 declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
    173 
    174 ; Function Attrs: nounwind readnone
    175 declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
    176 
    177 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
    178 attributes #1 = { nounwind readnone }
    179