1 ; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs 2 ; PR11765 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" 4 target triple = "armv7-none-linux-gnueabi" 5 6 ; This test case exercises the MachineCopyPropagation pass by disabling the 7 ; RegisterCoalescer. 8 9 define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 { 10 bb: 11 br i1 undef, label %bb1, label %bb2 12 13 bb1: ; preds = %bb 14 unreachable 15 16 bb2: ; preds = %bb 17 br i1 undef, label %bb92, label %bb3 18 19 bb3: ; preds = %bb2 20 %tmp = or <4 x i32> undef, undef 21 %tmp4 = bitcast <4 x i32> %tmp to <4 x float> 22 %tmp5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp4 23 %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float> 24 %tmp7 = fmul <4 x float> %tmp6, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00> 25 %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64> 26 %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer 27 %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float> 28 %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> <i32 1> 29 %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float> 30 %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> <i32 0, i32 2> 31 %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 1, i32 2> 32 %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64> 33 %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64> 34 %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer 35 %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32> 36 %tmp19 = and <2 x i32> %tmp18, <i32 -1, i32 0> 37 %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32> 38 %tmp21 = and <2 x i32> %tmp20, <i32 0, i32 -1> 39 %tmp22 = or <2 x i32> %tmp19, %tmp21 40 %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64> 41 %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> <i32 0, i32 1> 42 %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float> 43 %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> <i32 1> 44 %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32> 45 %tmp28 = and <2 x i32> %tmp27, <i32 -1, i32 0> 46 %tmp29 = and <2 x i32> undef, <i32 0, i32 -1> 47 %tmp30 = or <2 x i32> %tmp28, %tmp29 48 %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64> 49 %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3 50 %tmp33 = fmul <4 x float> undef, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00> 51 %tmp34 = fadd <4 x float> %tmp33, %tmp32 52 %tmp35 = fmul <4 x float> %tmp33, zeroinitializer 53 %tmp36 = fadd <4 x float> %tmp35, zeroinitializer 54 %tmp37 = fadd <4 x float> %tmp35, zeroinitializer 55 %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64> 56 %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer 57 %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float> 58 %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 59 %tmp42 = load <4 x float>* null, align 16 60 %tmp43 = fmul <4 x float> %tmp42, %tmp41 61 %tmp44 = load <4 x float>* undef, align 16 62 %tmp45 = fadd <4 x float> undef, %tmp43 63 %tmp46 = fadd <4 x float> undef, %tmp45 64 %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64> 65 %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer 66 %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float> 67 %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 68 %tmp51 = fmul <4 x float> %tmp42, %tmp50 69 %tmp52 = fmul <4 x float> %tmp44, undef 70 %tmp53 = fadd <4 x float> %tmp52, %tmp51 71 %tmp54 = fadd <4 x float> undef, %tmp53 72 %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64> 73 %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> <i32 1> 74 %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float> 75 %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer 76 %tmp59 = fmul <4 x float> undef, %tmp58 77 %tmp60 = fadd <4 x float> %tmp59, undef 78 %tmp61 = fadd <4 x float> %tmp60, zeroinitializer 79 %tmp62 = load void (i8*, i8*)** undef, align 4 80 call arm_aapcs_vfpcc void %tmp62(i8* sret undef, i8* undef) nounwind 81 %tmp63 = bitcast <4 x float> %tmp46 to i128 82 %tmp64 = bitcast <4 x float> %tmp54 to i128 83 %tmp65 = bitcast <4 x float> %tmp61 to i128 84 %tmp66 = lshr i128 %tmp63, 64 85 %tmp67 = trunc i128 %tmp66 to i64 86 %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1 87 %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2 88 %tmp70 = lshr i128 %tmp64, 64 89 %tmp71 = trunc i128 %tmp70 to i64 90 %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3 91 %tmp73 = trunc i128 %tmp65 to i64 92 %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4 93 %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5 94 %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6 95 %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7 96 call arm_aapcs_vfpcc void @bar(i8* sret null, [8 x i64] %tmp77) nounwind 97 %tmp78 = call arm_aapcs_vfpcc i8* null(i8* null) nounwind 98 %tmp79 = bitcast i8* %tmp78 to i512* 99 %tmp80 = load i512* %tmp79, align 16 100 %tmp81 = lshr i512 %tmp80, 128 101 %tmp82 = trunc i512 %tmp80 to i128 102 %tmp83 = trunc i512 %tmp81 to i128 103 %tmp84 = bitcast i128 %tmp83 to <4 x float> 104 %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64> 105 %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> <i32 1> 106 %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float> 107 %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 108 %tmp89 = fmul <4 x float> undef, %tmp88 109 %tmp90 = fadd <4 x float> %tmp89, undef 110 %tmp91 = fadd <4 x float> undef, %tmp90 111 store <4 x float> %tmp91, <4 x float>* undef, align 16 112 unreachable 113 114 bb92: ; preds = %bb2 115 ret void 116 } 117 118 declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint 119