1 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" 2 ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s 3 4 declare double @llvm.fma.f64(double, double, double) 5 declare double @llvm.fmuladd.f64(double, double, double) 6 declare double @llvm.cos.f64(double) 7 declare double @llvm.powi.f64(double, i32) 8 declare double @llvm.round.f64(double) 9 declare double @llvm.copysign.f64(double, double) 10 declare double @llvm.ceil.f64(double) 11 declare double @llvm.nearbyint.f64(double) 12 declare double @llvm.rint.f64(double) 13 declare double @llvm.trunc.f64(double) 14 declare double @llvm.floor.f64(double) 15 declare double @llvm.fabs.f64(double) 16 declare i64 @llvm.bswap.i64(i64) 17 declare i64 @llvm.ctpop.i64(i64) 18 declare i64 @llvm.ctlz.i64(i64, i1) 19 declare i64 @llvm.cttz.i64(i64, i1) 20 21 ; Basic depth-3 chain with fma 22 define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) { 23 %X1 = fsub double %A1, %B1 24 %X2 = fsub double %A2, %B2 25 %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1) 26 %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2) 27 %Z1 = fadd double %Y1, %B1 28 %Z2 = fadd double %Y2, %B2 29 %R = fmul double %Z1, %Z2 30 ret double %R 31 ; CHECK-LABEL: @test1( 32 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 33 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 34 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 35 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 36 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 37 ; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 38 ; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1 39 ; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2) 40 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 41 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 42 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 43 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 44 ; CHECK: ret double %R 45 } 46 47 ; Basic depth-3 chain with fmuladd 48 define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) { 49 %X1 = fsub double %A1, %B1 50 %X2 = fsub double %A2, %B2 51 %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1) 52 %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2) 53 %Z1 = fadd double %Y1, %B1 54 %Z2 = fadd double %Y2, %B2 55 %R = fmul double %Z1, %Z2 56 ret double %R 57 ; CHECK-LABEL: @test1a( 58 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 59 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 60 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 61 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 62 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 63 ; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 64 ; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1 65 ; CHECK: %Y1 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2) 66 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 67 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 68 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 69 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 70 ; CHECK: ret double %R 71 } 72 73 ; Basic depth-3 chain with cos 74 define double @test2(double %A1, double %A2, double %B1, double %B2) { 75 %X1 = fsub double %A1, %B1 76 %X2 = fsub double %A2, %B2 77 %Y1 = call double @llvm.cos.f64(double %X1) 78 %Y2 = call double @llvm.cos.f64(double %X2) 79 %Z1 = fadd double %Y1, %B1 80 %Z2 = fadd double %Y2, %B2 81 %R = fmul double %Z1, %Z2 82 ret double %R 83 ; CHECK-LABEL: @test2( 84 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 85 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 86 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 87 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 88 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 89 ; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1) 90 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 91 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 92 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 93 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 94 ; CHECK: ret double %R 95 } 96 97 ; Basic depth-3 chain with powi 98 define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) { 99 100 %X1 = fsub double %A1, %B1 101 %X2 = fsub double %A2, %B2 102 %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) 103 %Y2 = call double @llvm.powi.f64(double %X2, i32 %P) 104 %Z1 = fadd double %Y1, %B1 105 %Z2 = fadd double %Y2, %B2 106 %R = fmul double %Z1, %Z2 107 ret double %R 108 ; CHECK-LABEL: @test3( 109 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 110 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 111 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 112 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 113 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 114 ; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P) 115 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 116 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 117 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 118 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 119 ; CHECK: ret double %R 120 } 121 122 ; Basic depth-3 chain with powi (different powers: should not vectorize) 123 define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) { 124 125 %X1 = fsub double %A1, %B1 126 %X2 = fsub double %A2, %B2 127 %P2 = add i32 %P, 1 128 %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) 129 %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2) 130 %Z1 = fadd double %Y1, %B1 131 %Z2 = fadd double %Y2, %B2 132 %R = fmul double %Z1, %Z2 133 ret double %R 134 ; CHECK-LABEL: @test4( 135 ; CHECK-NOT: <2 x double> 136 ; CHECK: ret double %R 137 } 138 139 ; Basic depth-3 chain with round 140 define double @testround(double %A1, double %A2, double %B1, double %B2) { 141 %X1 = fsub double %A1, %B1 142 %X2 = fsub double %A2, %B2 143 %Y1 = call double @llvm.round.f64(double %X1) 144 %Y2 = call double @llvm.round.f64(double %X2) 145 %Z1 = fadd double %Y1, %B1 146 %Z2 = fadd double %Y2, %B2 147 %R = fmul double %Z1, %Z2 148 ret double %R 149 ; CHECK: @testround 150 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 151 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 152 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 153 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 154 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 155 ; CHECK: %Y1 = call <2 x double> @llvm.round.v2f64(<2 x double> %X1) 156 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 157 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 158 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 159 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 160 ; CHECK: ret double %R 161 162 } 163 164 ; Basic depth-3 chain with copysign 165 define double @testcopysign(double %A1, double %A2, double %B1, double %B2) { 166 %X1 = fsub double %A1, %B1 167 %X2 = fsub double %A2, %B2 168 %Y1 = call double @llvm.copysign.f64(double %X1, double %A1) 169 %Y2 = call double @llvm.copysign.f64(double %X2, double %A1) 170 %Z1 = fadd double %Y1, %B1 171 %Z2 = fadd double %Y2, %B2 172 %R = fmul double %Z1, %Z2 173 ret double %R 174 ; CHECK: @testcopysign 175 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 176 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 177 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 178 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 179 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 180 ; CHECK: %Y1.v.i1.2 = insertelement <2 x double> %X1.v.i0.1, double %A1, i32 1 181 ; CHECK: %Y1 = call <2 x double> @llvm.copysign.v2f64(<2 x double> %X1, <2 x double> %Y1.v.i1.2) 182 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 183 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 184 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 185 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 186 ; CHECK: ret double %R 187 188 } 189 190 ; Basic depth-3 chain with ceil 191 define double @testceil(double %A1, double %A2, double %B1, double %B2) { 192 %X1 = fsub double %A1, %B1 193 %X2 = fsub double %A2, %B2 194 %Y1 = call double @llvm.ceil.f64(double %X1) 195 %Y2 = call double @llvm.ceil.f64(double %X2) 196 %Z1 = fadd double %Y1, %B1 197 %Z2 = fadd double %Y2, %B2 198 %R = fmul double %Z1, %Z2 199 ret double %R 200 ; CHECK: @testceil 201 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 202 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 203 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 204 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 205 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 206 ; CHECK: %Y1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %X1) 207 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 208 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 209 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 210 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 211 ; CHECK: ret double %R 212 213 } 214 215 ; Basic depth-3 chain with nearbyint 216 define double @testnearbyint(double %A1, double %A2, double %B1, double %B2) { 217 %X1 = fsub double %A1, %B1 218 %X2 = fsub double %A2, %B2 219 %Y1 = call double @llvm.nearbyint.f64(double %X1) 220 %Y2 = call double @llvm.nearbyint.f64(double %X2) 221 %Z1 = fadd double %Y1, %B1 222 %Z2 = fadd double %Y2, %B2 223 %R = fmul double %Z1, %Z2 224 ret double %R 225 ; CHECK: @testnearbyint 226 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 227 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 228 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 229 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 230 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 231 ; CHECK: %Y1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %X1) 232 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 233 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 234 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 235 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 236 ; CHECK: ret double %R 237 238 } 239 240 ; Basic depth-3 chain with rint 241 define double @testrint(double %A1, double %A2, double %B1, double %B2) { 242 %X1 = fsub double %A1, %B1 243 %X2 = fsub double %A2, %B2 244 %Y1 = call double @llvm.rint.f64(double %X1) 245 %Y2 = call double @llvm.rint.f64(double %X2) 246 %Z1 = fadd double %Y1, %B1 247 %Z2 = fadd double %Y2, %B2 248 %R = fmul double %Z1, %Z2 249 ret double %R 250 ; CHECK: @testrint 251 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 252 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 253 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 254 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 255 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 256 ; CHECK: %Y1 = call <2 x double> @llvm.rint.v2f64(<2 x double> %X1) 257 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 258 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 259 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 260 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 261 ; CHECK: ret double %R 262 263 } 264 265 ; Basic depth-3 chain with trunc 266 define double @testtrunc(double %A1, double %A2, double %B1, double %B2) { 267 %X1 = fsub double %A1, %B1 268 %X2 = fsub double %A2, %B2 269 %Y1 = call double @llvm.trunc.f64(double %X1) 270 %Y2 = call double @llvm.trunc.f64(double %X2) 271 %Z1 = fadd double %Y1, %B1 272 %Z2 = fadd double %Y2, %B2 273 %R = fmul double %Z1, %Z2 274 ret double %R 275 ; CHECK: @testtrunc 276 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 277 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 278 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 279 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 280 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 281 ; CHECK: %Y1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %X1) 282 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 283 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 284 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 285 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 286 ; CHECK: ret double %R 287 288 } 289 290 ; Basic depth-3 chain with floor 291 define double @testfloor(double %A1, double %A2, double %B1, double %B2) { 292 %X1 = fsub double %A1, %B1 293 %X2 = fsub double %A2, %B2 294 %Y1 = call double @llvm.floor.f64(double %X1) 295 %Y2 = call double @llvm.floor.f64(double %X2) 296 %Z1 = fadd double %Y1, %B1 297 %Z2 = fadd double %Y2, %B2 298 %R = fmul double %Z1, %Z2 299 ret double %R 300 ; CHECK: @testfloor 301 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 302 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 303 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 304 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 305 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 306 ; CHECK: %Y1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %X1) 307 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 308 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 309 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 310 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 311 ; CHECK: ret double %R 312 313 } 314 315 ; Basic depth-3 chain with fabs 316 define double @testfabs(double %A1, double %A2, double %B1, double %B2) { 317 %X1 = fsub double %A1, %B1 318 %X2 = fsub double %A2, %B2 319 %Y1 = call double @llvm.fabs.f64(double %X1) 320 %Y2 = call double @llvm.fabs.f64(double %X2) 321 %Z1 = fadd double %Y1, %B1 322 %Z2 = fadd double %Y2, %B2 323 %R = fmul double %Z1, %Z2 324 ret double %R 325 ; CHECK: @testfabs 326 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 327 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 328 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 329 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 330 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 331 ; CHECK: %Y1 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %X1) 332 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 333 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 334 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 335 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 336 ; CHECK: ret double %R 337 338 } 339 340 ; Basic depth-3 chain with bswap 341 define i64 @testbswap(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 342 %X1 = sub i64 %A1, %B1 343 %X2 = sub i64 %A2, %B2 344 %Y1 = call i64 @llvm.bswap.i64(i64 %X1) 345 %Y2 = call i64 @llvm.bswap.i64(i64 %X2) 346 %Z1 = add i64 %Y1, %B1 347 %Z2 = add i64 %Y2, %B2 348 %R = mul i64 %Z1, %Z2 349 ret i64 %R 350 351 ; CHECK: @testbswap 352 ; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 353 ; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 354 ; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 355 ; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 356 ; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 357 ; CHECK: %Y1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %X1) 358 ; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 359 ; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 360 ; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 361 ; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 362 ; CHECK: ret i64 %R 363 364 } 365 366 ; Basic depth-3 chain with ctpop 367 define i64 @testctpop(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 368 %X1 = sub i64 %A1, %B1 369 %X2 = sub i64 %A2, %B2 370 %Y1 = call i64 @llvm.ctpop.i64(i64 %X1) 371 %Y2 = call i64 @llvm.ctpop.i64(i64 %X2) 372 %Z1 = add i64 %Y1, %B1 373 %Z2 = add i64 %Y2, %B2 374 %R = mul i64 %Z1, %Z2 375 ret i64 %R 376 377 ; CHECK: @testctpop 378 ; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 379 ; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 380 ; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 381 ; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 382 ; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 383 ; CHECK: %Y1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %X1) 384 ; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 385 ; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 386 ; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 387 ; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 388 ; CHECK: ret i64 %R 389 390 } 391 392 ; Basic depth-3 chain with ctlz 393 define i64 @testctlz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 394 %X1 = sub i64 %A1, %B1 395 %X2 = sub i64 %A2, %B2 396 %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) 397 %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true) 398 %Z1 = add i64 %Y1, %B1 399 %Z2 = add i64 %Y2, %B2 400 %R = mul i64 %Z1, %Z2 401 ret i64 %R 402 403 ; CHECK: @testctlz 404 ; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 405 ; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 406 ; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 407 ; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 408 ; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 409 ; CHECK: %Y1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %X1, i1 true) 410 ; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 411 ; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 412 ; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 413 ; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 414 ; CHECK: ret i64 %R 415 416 } 417 418 ; Basic depth-3 chain with ctlz 419 define i64 @testctlzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 420 %X1 = sub i64 %A1, %B1 421 %X2 = sub i64 %A2, %B2 422 %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) 423 %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false) 424 %Z1 = add i64 %Y1, %B1 425 %Z2 = add i64 %Y2, %B2 426 %R = mul i64 %Z1, %Z2 427 ret i64 %R 428 429 ; CHECK: @testctlzneg 430 ; CHECK: %X1 = sub i64 %A1, %B1 431 ; CHECK: %X2 = sub i64 %A2, %B2 432 ; CHECK: %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) 433 ; CHECK: %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false) 434 ; CHECK: %Z1 = add i64 %Y1, %B1 435 ; CHECK: %Z2 = add i64 %Y2, %B2 436 ; CHECK: %R = mul i64 %Z1, %Z2 437 ; CHECK: ret i64 %R 438 } 439 440 ; Basic depth-3 chain with cttz 441 define i64 @testcttz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 442 %X1 = sub i64 %A1, %B1 443 %X2 = sub i64 %A2, %B2 444 %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) 445 %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true) 446 %Z1 = add i64 %Y1, %B1 447 %Z2 = add i64 %Y2, %B2 448 %R = mul i64 %Z1, %Z2 449 ret i64 %R 450 451 ; CHECK: @testcttz 452 ; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 453 ; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 454 ; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 455 ; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 456 ; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 457 ; CHECK: %Y1 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %X1, i1 true) 458 ; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 459 ; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 460 ; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 461 ; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 462 ; CHECK: ret i64 %R 463 464 } 465 466 ; Basic depth-3 chain with cttz 467 define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 468 %X1 = sub i64 %A1, %B1 469 %X2 = sub i64 %A2, %B2 470 %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) 471 %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false) 472 %Z1 = add i64 %Y1, %B1 473 %Z2 = add i64 %Y2, %B2 474 %R = mul i64 %Z1, %Z2 475 ret i64 %R 476 477 ; CHECK: @testcttzneg 478 ; CHECK: %X1 = sub i64 %A1, %B1 479 ; CHECK: %X2 = sub i64 %A2, %B2 480 ; CHECK: %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) 481 ; CHECK: %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false) 482 ; CHECK: %Z1 = add i64 %Y1, %B1 483 ; CHECK: %Z2 = add i64 %Y2, %B2 484 ; CHECK: %R = mul i64 %Z1, %Z2 485 ; CHECK: ret i64 %R 486 } 487 488 489 490 ; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 491 ; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 492 ; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0 493 ; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #0 494 ; CHECK: declare <2 x double> @llvm.round.v2f64(<2 x double>) #0 495 ; CHECK: declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0 496 ; CHECK: declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #0 497 ; CHECK: declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0 498 ; CHECK: declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0 499 ; CHECK: declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #0 500 ; CHECK: declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0 501 ; CHECK: declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0 502 ; CHECK: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #0 503 ; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0 504 ; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0 505 ; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0 506 ; CHECK: attributes #0 = { nounwind readnone } 507