1 ; RUN: llc < %s | FileCheck %s 2 target datalayout = "E-m:e-i64:64-n32:64" 3 target triple = "powerpc64-unknown-linux-gnu" 4 5 ; Function Attrs: nounwind 6 define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 { 7 entry: 8 br label %vector.body 9 10 ; CHECK-LABEL: @foo 11 ; Make sure that the offset constants we use are all even (only the last should be odd). 12 ; CHECK-DAG: li {{[0-9]+}}, 1056 13 ; CHECK-DAG: li {{[0-9]+}}, 1088 14 ; CHECK-DAG: li {{[0-9]+}}, 1152 15 ; CHECK-DAG: li {{[0-9]+}}, 1216 16 ; CHECK-DAG: li {{[0-9]+}}, 1280 17 ; CHECK-DAG: li {{[0-9]+}}, 1344 18 ; CHECK-DAG: li {{[0-9]+}}, 1408 19 ; CHECK-DAG: li {{[0-9]+}}, 1472 20 ; CHECK-DAG: li {{[0-9]+}}, 1536 21 ; CHECK-DAG: li {{[0-9]+}}, 1600 22 ; CHECK-DAG: li {{[0-9]+}}, 1568 23 ; CHECK-DAG: li {{[0-9]+}}, 1664 24 ; CHECK-DAG: li {{[0-9]+}}, 1632 25 ; CHECK-DAG: li {{[0-9]+}}, 1728 26 ; CHECK-DAG: li {{[0-9]+}}, 1696 27 ; CHECK-DAG: li {{[0-9]+}}, 1792 28 ; CHECK-DAG: li {{[0-9]+}}, 1760 29 ; CHECK-DAG: li {{[0-9]+}}, 1856 30 ; CHECK-DAG: li {{[0-9]+}}, 1824 31 ; CHECK-DAG: li {{[0-9]+}}, 1920 32 ; CHECK-DAG: li {{[0-9]+}}, 1888 33 ; CHECK-DAG: li {{[0-9]+}}, 1984 34 ; CHECK-DAG: li {{[0-9]+}}, 1952 35 ; CHECK-DAG: li {{[0-9]+}}, 2016 36 ; CHECK-DAG: li {{[0-9]+}}, 1024 37 ; CHECK-DAG: li {{[0-9]+}}, 1120 38 ; CHECK-DAG: li {{[0-9]+}}, 1184 39 ; CHECK-DAG: li {{[0-9]+}}, 1248 40 ; CHECK-DAG: li {{[0-9]+}}, 1312 41 ; CHECK-DAG: li {{[0-9]+}}, 1376 42 ; CHECK-DAG: li {{[0-9]+}}, 1440 43 ; CHECK-DAG: li {{[0-9]+}}, 1504 44 ; CHECK-DAG: li {{[0-9]+}}, 2047 45 ; CHECK: blr 46 47 vector.body: ; preds = %vector.body, %entry 48 %index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ] 49 %0 = shl i64 %index, 1 50 %1 = getelementptr inbounds double, double* %b, i64 %0 51 %2 = bitcast double* %1 to <8 x double>* 52 %wide.vec = load <8 x double>, <8 x double>* %2, align 8 53 %strided.vec = shufflevector <8 x double> %wide.vec, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 54 %3 = fadd <4 x double> %strided.vec, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 55 %4 = getelementptr inbounds double, double* %a, i64 %index 56 %5 = bitcast double* %4 to <4 x double>* 57 store <4 x double> %3, <4 x double>* %5, align 8 58 %index.next = or i64 %index, 4 59 %6 = shl i64 %index.next, 1 60 %7 = getelementptr inbounds double, double* %b, i64 %6 61 %8 = bitcast double* %7 to <8 x double>* 62 %wide.vec.1 = load <8 x double>, <8 x double>* %8, align 8 63 %strided.vec.1 = shufflevector <8 x double> %wide.vec.1, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 64 %9 = fadd <4 x double> %strided.vec.1, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 65 %10 = getelementptr inbounds double, double* %a, i64 %index.next 66 %11 = bitcast double* %10 to <4 x double>* 67 store <4 x double> %9, <4 x double>* %11, align 8 68 %index.next.1 = or i64 %index, 8 69 %12 = shl i64 %index.next.1, 1 70 %13 = getelementptr inbounds double, double* %b, i64 %12 71 %14 = bitcast double* %13 to <8 x double>* 72 %wide.vec.2 = load <8 x double>, <8 x double>* %14, align 8 73 %strided.vec.2 = shufflevector <8 x double> %wide.vec.2, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 74 %15 = fadd <4 x double> %strided.vec.2, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 75 %16 = getelementptr inbounds double, double* %a, i64 %index.next.1 76 %17 = bitcast double* %16 to <4 x double>* 77 store <4 x double> %15, <4 x double>* %17, align 8 78 %index.next.2 = or i64 %index, 12 79 %18 = shl i64 %index.next.2, 1 80 %19 = getelementptr inbounds double, double* %b, i64 %18 81 %20 = bitcast double* %19 to <8 x double>* 82 %wide.vec.3 = load <8 x double>, <8 x double>* %20, align 8 83 %strided.vec.3 = shufflevector <8 x double> %wide.vec.3, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 84 %21 = fadd <4 x double> %strided.vec.3, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 85 %22 = getelementptr inbounds double, double* %a, i64 %index.next.2 86 %23 = bitcast double* %22 to <4 x double>* 87 store <4 x double> %21, <4 x double>* %23, align 8 88 %index.next.3 = or i64 %index, 16 89 %24 = shl i64 %index.next.3, 1 90 %25 = getelementptr inbounds double, double* %b, i64 %24 91 %26 = bitcast double* %25 to <8 x double>* 92 %wide.vec.4 = load <8 x double>, <8 x double>* %26, align 8 93 %strided.vec.4 = shufflevector <8 x double> %wide.vec.4, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 94 %27 = fadd <4 x double> %strided.vec.4, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 95 %28 = getelementptr inbounds double, double* %a, i64 %index.next.3 96 %29 = bitcast double* %28 to <4 x double>* 97 store <4 x double> %27, <4 x double>* %29, align 8 98 %index.next.4 = or i64 %index, 20 99 %30 = shl i64 %index.next.4, 1 100 %31 = getelementptr inbounds double, double* %b, i64 %30 101 %32 = bitcast double* %31 to <8 x double>* 102 %wide.vec.5 = load <8 x double>, <8 x double>* %32, align 8 103 %strided.vec.5 = shufflevector <8 x double> %wide.vec.5, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 104 %33 = fadd <4 x double> %strided.vec.5, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 105 %34 = getelementptr inbounds double, double* %a, i64 %index.next.4 106 %35 = bitcast double* %34 to <4 x double>* 107 store <4 x double> %33, <4 x double>* %35, align 8 108 %index.next.5 = or i64 %index, 24 109 %36 = shl i64 %index.next.5, 1 110 %37 = getelementptr inbounds double, double* %b, i64 %36 111 %38 = bitcast double* %37 to <8 x double>* 112 %wide.vec.6 = load <8 x double>, <8 x double>* %38, align 8 113 %strided.vec.6 = shufflevector <8 x double> %wide.vec.6, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 114 %39 = fadd <4 x double> %strided.vec.6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 115 %40 = getelementptr inbounds double, double* %a, i64 %index.next.5 116 %41 = bitcast double* %40 to <4 x double>* 117 store <4 x double> %39, <4 x double>* %41, align 8 118 %index.next.6 = or i64 %index, 28 119 %42 = shl i64 %index.next.6, 1 120 %43 = getelementptr inbounds double, double* %b, i64 %42 121 %44 = bitcast double* %43 to <8 x double>* 122 %wide.vec.7 = load <8 x double>, <8 x double>* %44, align 8 123 %strided.vec.7 = shufflevector <8 x double> %wide.vec.7, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 124 %45 = fadd <4 x double> %strided.vec.7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 125 %46 = getelementptr inbounds double, double* %a, i64 %index.next.6 126 %47 = bitcast double* %46 to <4 x double>* 127 store <4 x double> %45, <4 x double>* %47, align 8 128 %index.next.7 = or i64 %index, 32 129 %48 = shl i64 %index.next.7, 1 130 %49 = getelementptr inbounds double, double* %b, i64 %48 131 %50 = bitcast double* %49 to <8 x double>* 132 %wide.vec.8 = load <8 x double>, <8 x double>* %50, align 8 133 %strided.vec.8 = shufflevector <8 x double> %wide.vec.8, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 134 %51 = fadd <4 x double> %strided.vec.8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 135 %52 = getelementptr inbounds double, double* %a, i64 %index.next.7 136 %53 = bitcast double* %52 to <4 x double>* 137 store <4 x double> %51, <4 x double>* %53, align 8 138 %index.next.8 = or i64 %index, 36 139 %54 = shl i64 %index.next.8, 1 140 %55 = getelementptr inbounds double, double* %b, i64 %54 141 %56 = bitcast double* %55 to <8 x double>* 142 %wide.vec.9 = load <8 x double>, <8 x double>* %56, align 8 143 %strided.vec.9 = shufflevector <8 x double> %wide.vec.9, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 144 %57 = fadd <4 x double> %strided.vec.9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 145 %58 = getelementptr inbounds double, double* %a, i64 %index.next.8 146 %59 = bitcast double* %58 to <4 x double>* 147 store <4 x double> %57, <4 x double>* %59, align 8 148 %index.next.9 = or i64 %index, 40 149 %60 = shl i64 %index.next.9, 1 150 %61 = getelementptr inbounds double, double* %b, i64 %60 151 %62 = bitcast double* %61 to <8 x double>* 152 %wide.vec.10 = load <8 x double>, <8 x double>* %62, align 8 153 %strided.vec.10 = shufflevector <8 x double> %wide.vec.10, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 154 %63 = fadd <4 x double> %strided.vec.10, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 155 %64 = getelementptr inbounds double, double* %a, i64 %index.next.9 156 %65 = bitcast double* %64 to <4 x double>* 157 store <4 x double> %63, <4 x double>* %65, align 8 158 %index.next.10 = or i64 %index, 44 159 %66 = shl i64 %index.next.10, 1 160 %67 = getelementptr inbounds double, double* %b, i64 %66 161 %68 = bitcast double* %67 to <8 x double>* 162 %wide.vec.11 = load <8 x double>, <8 x double>* %68, align 8 163 %strided.vec.11 = shufflevector <8 x double> %wide.vec.11, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 164 %69 = fadd <4 x double> %strided.vec.11, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 165 %70 = getelementptr inbounds double, double* %a, i64 %index.next.10 166 %71 = bitcast double* %70 to <4 x double>* 167 store <4 x double> %69, <4 x double>* %71, align 8 168 %index.next.11 = or i64 %index, 48 169 %72 = shl i64 %index.next.11, 1 170 %73 = getelementptr inbounds double, double* %b, i64 %72 171 %74 = bitcast double* %73 to <8 x double>* 172 %wide.vec.12 = load <8 x double>, <8 x double>* %74, align 8 173 %strided.vec.12 = shufflevector <8 x double> %wide.vec.12, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 174 %75 = fadd <4 x double> %strided.vec.12, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 175 %76 = getelementptr inbounds double, double* %a, i64 %index.next.11 176 %77 = bitcast double* %76 to <4 x double>* 177 store <4 x double> %75, <4 x double>* %77, align 8 178 %index.next.12 = or i64 %index, 52 179 %78 = shl i64 %index.next.12, 1 180 %79 = getelementptr inbounds double, double* %b, i64 %78 181 %80 = bitcast double* %79 to <8 x double>* 182 %wide.vec.13 = load <8 x double>, <8 x double>* %80, align 8 183 %strided.vec.13 = shufflevector <8 x double> %wide.vec.13, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 184 %81 = fadd <4 x double> %strided.vec.13, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 185 %82 = getelementptr inbounds double, double* %a, i64 %index.next.12 186 %83 = bitcast double* %82 to <4 x double>* 187 store <4 x double> %81, <4 x double>* %83, align 8 188 %index.next.13 = or i64 %index, 56 189 %84 = shl i64 %index.next.13, 1 190 %85 = getelementptr inbounds double, double* %b, i64 %84 191 %86 = bitcast double* %85 to <8 x double>* 192 %wide.vec.14 = load <8 x double>, <8 x double>* %86, align 8 193 %strided.vec.14 = shufflevector <8 x double> %wide.vec.14, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 194 %87 = fadd <4 x double> %strided.vec.14, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 195 %88 = getelementptr inbounds double, double* %a, i64 %index.next.13 196 %89 = bitcast double* %88 to <4 x double>* 197 store <4 x double> %87, <4 x double>* %89, align 8 198 %index.next.14 = or i64 %index, 60 199 %90 = shl i64 %index.next.14, 1 200 %91 = getelementptr inbounds double, double* %b, i64 %90 201 %92 = bitcast double* %91 to <8 x double>* 202 %wide.vec.15 = load <8 x double>, <8 x double>* %92, align 8 203 %strided.vec.15 = shufflevector <8 x double> %wide.vec.15, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 204 %93 = fadd <4 x double> %strided.vec.15, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> 205 %94 = getelementptr inbounds double, double* %a, i64 %index.next.14 206 %95 = bitcast double* %94 to <4 x double>* 207 store <4 x double> %93, <4 x double>* %95, align 8 208 %index.next.15 = add nsw i64 %index, 64 209 %96 = icmp eq i64 %index.next.15, 1600 210 br i1 %96, label %for.cond.cleanup, label %vector.body 211 212 for.cond.cleanup: ; preds = %vector.body 213 ret void 214 } 215 216 attributes #0 = { nounwind "target-cpu"="a2q" } 217 218