1 ; RUN: llc -march=hexagon < %s 2 ; REQUIRES: asserts 3 4 ; Dead defs may still appear live in LivePhysRegs, leading to an expansion 5 ; of a double-vector store that uses an undefined source register. 6 7 target triple = "hexagon-unknown--elf" 8 9 declare noalias i8* @halide_malloc() local_unnamed_addr #0 10 declare void @halide_free() local_unnamed_addr #0 11 12 declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1 13 declare <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32>) #1 14 declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) #1 15 declare <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32>, <32 x i32>) #1 16 declare <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32>, <32 x i32>) #1 17 declare <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32>, <32 x i32>, i32) #1 18 declare <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32>, <32 x i32>, i32) #1 19 declare <32 x i32> @llvm.hexagon.V6.vasrwh.128B(<32 x i32>, <32 x i32>, i32) #1 20 declare <32 x i32> @llvm.hexagon.V6.vavghrnd.128B(<32 x i32>, <32 x i32>) #1 21 declare <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32>, i32) #1 22 declare <32 x i32> @llvm.hexagon.V6.vpackeh.128B(<32 x i32>, <32 x i32>) #1 23 declare <32 x i32> @llvm.hexagon.V6.vshufoh.128B(<32 x i32>, <32 x i32>) #1 24 declare <32 x i32> @llvm.hexagon.V6.vsubhsat.128B(<32 x i32>, <32 x i32>) #1 25 declare <64 x i32> @llvm.hexagon.V6.vaddhw.128B(<32 x i32>, <32 x i32>) #1 26 declare <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32>, <64 x i32>) #1 27 declare <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32>, <32 x i32>) #1 28 declare <64 x i32> @llvm.hexagon.V6.vmpyuh.128B(<32 x i32>, i32) #1 29 declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) #1 30 declare <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32>, <32 x i32>, i32) #1 31 32 define hidden void @fred() #0 { 33 b0: 34 br i1 undef, label %b1, label %b2 35 36 b1: ; preds = %b0 37 ret void 38 39 b2: ; preds = %b0 40 %v3 = tail call i8* @halide_malloc() 41 %v4 = bitcast i8* %v3 to i16* 42 %v5 = tail call i8* @halide_malloc() 43 %v6 = bitcast i8* %v5 to i16* 44 %v7 = tail call i8* @halide_malloc() 45 %v8 = bitcast i8* %v7 to i16* 46 %v9 = tail call i8* @halide_malloc() 47 %v10 = bitcast i8* %v9 to i16* 48 br label %b11 49 50 b11: ; preds = %b11, %b2 51 br i1 undef, label %b12, label %b11 52 53 b12: ; preds = %b11 54 br i1 undef, label %b16, label %b13 55 56 b13: ; preds = %b12 57 %v14 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> zeroinitializer) #2 58 %v15 = tail call <32 x i32> @llvm.hexagon.V6.vasrwh.128B(<32 x i32> undef, <32 x i32> %v14, i32 1) #2 59 br i1 undef, label %b19, label %b17 60 61 b16: ; preds = %b12 62 unreachable 63 64 b17: ; preds = %b13 65 %v18 = tail call <32 x i32> @llvm.hexagon.V6.vavghrnd.128B(<32 x i32> %v15, <32 x i32> undef) #2 66 br label %b19 67 68 b19: ; preds = %b17, %b13 69 %v20 = phi <32 x i32> [ %v18, %b17 ], [ %v15, %b13 ] 70 %v21 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> zeroinitializer, <32 x i32> %v20) #2 71 %v22 = tail call <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32> %v21, <32 x i32> undef, i32 -2) 72 %v23 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v22) 73 store <32 x i32> %v23, <32 x i32>* undef, align 128 74 tail call void @halide_free() #3 75 br label %b24 76 77 b24: ; preds = %b33, %b19 78 %v25 = load <32 x i32>, <32 x i32>* undef, align 128 79 %v26 = fptoui float undef to i16 80 %v27 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 -2147450880) #2 81 %v28 = xor i16 %v26, -1 82 %v29 = zext i16 %v28 to i32 83 %v30 = or i32 0, %v29 84 %v31 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1) #2 85 %v32 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v31, <32 x i32> %v31) 86 br label %b34 87 88 b33: ; preds = %b34 89 br label %b24 90 91 b34: ; preds = %b34, %b24 92 %v35 = phi <32 x i32> [ %v45, %b34 ], [ undef, %b24 ] 93 %v36 = phi <32 x i32> [ undef, %b34 ], [ %v25, %b24 ] 94 %v37 = phi <32 x i32> [ %v46, %b34 ], [ undef, %b24 ] 95 %v38 = phi i32 [ %v145, %b34 ], [ 0, %b24 ] 96 %v39 = load <32 x i32>, <32 x i32>* undef, align 128 97 %v40 = add nsw i32 %v38, undef 98 %v41 = shl nsw i32 %v40, 6 99 %v42 = add nsw i32 %v41, 64 100 %v43 = getelementptr inbounds i16, i16* %v6, i32 %v42 101 %v44 = bitcast i16* %v43 to <32 x i32>* 102 %v45 = load <32 x i32>, <32 x i32>* %v44, align 128 103 %v46 = load <32 x i32>, <32 x i32>* undef, align 128 104 %v47 = load <32 x i32>, <32 x i32>* null, align 128 105 %v48 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 2) 106 %v49 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v45, <32 x i32> %v35, i32 24) 107 %v50 = tail call <32 x i32> @llvm.hexagon.V6.vsubhsat.128B(<32 x i32> %v48, <32 x i32> %v49) #2 108 %v51 = tail call <64 x i32> @llvm.hexagon.V6.vaddhw.128B(<32 x i32> undef, <32 x i32> %v50) #2 109 %v52 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v39, <32 x i32> %v47, i32 50) 110 %v53 = tail call <32 x i32> @llvm.hexagon.V6.vpackeh.128B(<32 x i32> %v52, <32 x i32> undef) 111 %v54 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v53, <32 x i32> %v27) #2 112 %v55 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> undef, <32 x i32> %v54, i32 undef) #2 113 %v56 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v55, <64 x i32> zeroinitializer) #2 114 %v57 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v56) 115 %v58 = tail call <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32> %v57, i32 16) #2 116 %v59 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v56) 117 %v60 = tail call <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32> %v59, i32 16) #2 118 %v61 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v60, <32 x i32> %v58) 119 %v62 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v61, <64 x i32> %v55) #2 120 %v63 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v62, <64 x i32> zeroinitializer) #2 121 %v64 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v63) #2 122 %v65 = tail call <32 x i32> @llvm.hexagon.V6.vshufoh.128B(<32 x i32> %v64, <32 x i32> undef) #2 123 %v66 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v65, <32 x i32> %v27) #2 124 %v67 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v66, <32 x i32> undef) #2 125 %v68 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> zeroinitializer, <32 x i32> %v27) #2 126 %v69 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.128B(<32 x i32> %v68, i32 %v30) #2 127 %v70 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v47, <32 x i32> undef, i32 52) 128 %v71 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v39, <32 x i32> %v47, i32 52) 129 %v72 = tail call <32 x i32> @llvm.hexagon.V6.vpackeh.128B(<32 x i32> %v71, <32 x i32> %v70) 130 %v73 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v72, <32 x i32> %v27) #2 131 %v74 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %v69, <32 x i32> %v73, i32 undef) #2 132 %v75 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v74, <64 x i32> zeroinitializer) #2 133 %v76 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v75) 134 %v77 = tail call <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32> %v76, i32 16) #2 135 %v78 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> undef, <32 x i32> %v77) 136 %v79 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v78, <64 x i32> %v74) #2 137 %v80 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v79, <64 x i32> zeroinitializer) #2 138 %v81 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v80) #2 139 %v82 = tail call <32 x i32> @llvm.hexagon.V6.vshufoh.128B(<32 x i32> %v81, <32 x i32> undef) #2 140 %v83 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v82, <32 x i32> %v27) #2 141 %v84 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v51, <64 x i32> %v32) #2 142 %v85 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v84) #2 143 %v86 = tail call <32 x i32> @llvm.hexagon.V6.vasrwh.128B(<32 x i32> undef, <32 x i32> %v85, i32 1) #2 144 %v87 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v83, <32 x i32> %v86) #2 145 %v88 = tail call <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32> %v87, <32 x i32> %v67, i32 -2) 146 %v89 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v88) 147 %v90 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v88) 148 %v91 = getelementptr inbounds i16, i16* %v10, i32 undef 149 %v92 = bitcast i16* %v91 to <32 x i32>* 150 store <32 x i32> %v90, <32 x i32>* %v92, align 128 151 %v93 = getelementptr inbounds i16, i16* %v10, i32 undef 152 %v94 = bitcast i16* %v93 to <32 x i32>* 153 store <32 x i32> %v89, <32 x i32>* %v94, align 128 154 %v95 = getelementptr inbounds i16, i16* %v4, i32 undef 155 %v96 = bitcast i16* %v95 to <32 x i32>* 156 %v97 = load <32 x i32>, <32 x i32>* %v96, align 128 157 %v98 = getelementptr inbounds i16, i16* %v8, i32 undef 158 %v99 = bitcast i16* %v98 to <32 x i32>* 159 %v100 = load <32 x i32>, <32 x i32>* %v99, align 128 160 %v101 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> undef, <32 x i32> %v36, i32 22) 161 %v102 = tail call <32 x i32> @llvm.hexagon.V6.vsubhsat.128B(<32 x i32> %v100, <32 x i32> %v101) #2 162 %v103 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> undef, <32 x i32> %v102) #2 163 %v104 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v97, <32 x i32> %v37, i32 48) 164 %v105 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v46, <32 x i32> %v97, i32 48) 165 %v106 = tail call <32 x i32> @llvm.hexagon.V6.vpackeh.128B(<32 x i32> %v105, <32 x i32> %v104) 166 %v107 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> undef, <64 x i32> %v32) #2 167 %v108 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v107) #2 168 %v109 = tail call <32 x i32> @llvm.hexagon.V6.vasrwh.128B(<32 x i32> undef, <32 x i32> %v108, i32 1) #2 169 %v110 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v106, <32 x i32> %v109) #2 170 %v111 = tail call <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32> %v110, <32 x i32> %v103, i32 -2) 171 %v112 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v111) 172 %v113 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v111) 173 %v114 = getelementptr inbounds i16, i16* %v10, i32 undef 174 %v115 = bitcast i16* %v114 to <32 x i32>* 175 store <32 x i32> %v113, <32 x i32>* %v115, align 128 176 %v116 = getelementptr inbounds i16, i16* %v10, i32 undef 177 %v117 = bitcast i16* %v116 to <32 x i32>* 178 store <32 x i32> %v112, <32 x i32>* %v117, align 128 179 %v118 = getelementptr inbounds i16, i16* %v4, i32 undef 180 %v119 = bitcast i16* %v118 to <32 x i32>* 181 %v120 = load <32 x i32>, <32 x i32>* %v119, align 128 182 %v121 = getelementptr inbounds i16, i16* %v6, i32 undef 183 %v122 = bitcast i16* %v121 to <32 x i32>* 184 %v123 = load <32 x i32>, <32 x i32>* %v122, align 128 185 %v124 = getelementptr inbounds i16, i16* %v6, i32 0 186 %v125 = bitcast i16* %v124 to <32 x i32>* 187 %v126 = load <32 x i32>, <32 x i32>* %v125, align 128 188 %v127 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v126, <32 x i32> %v123, i32 22) 189 %v128 = tail call <32 x i32> @llvm.hexagon.V6.vsubhsat.128B(<32 x i32> undef, <32 x i32> %v127) #2 190 %v129 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v126, <32 x i32> %v123, i32 24) 191 %v130 = tail call <32 x i32> @llvm.hexagon.V6.vsubhsat.128B(<32 x i32> undef, <32 x i32> %v129) #2 192 %v131 = tail call <64 x i32> @llvm.hexagon.V6.vaddhw.128B(<32 x i32> %v128, <32 x i32> %v130) #2 193 %v132 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v120, <32 x i32> undef, i32 46) 194 %v133 = tail call <32 x i32> @llvm.hexagon.V6.vpackeh.128B(<32 x i32> undef, <32 x i32> %v132) 195 %v134 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v133, <32 x i32> %v128) #2 196 %v135 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v120, <32 x i32> undef, i32 48) 197 %v136 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> undef, <32 x i32> %v120, i32 48) 198 %v137 = tail call <32 x i32> @llvm.hexagon.V6.vpackeh.128B(<32 x i32> %v136, <32 x i32> %v135) 199 %v138 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v131, <64 x i32> %v32) #2 200 %v139 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v138) #2 201 %v140 = tail call <32 x i32> @llvm.hexagon.V6.vasrwh.128B(<32 x i32> %v139, <32 x i32> undef, i32 1) #2 202 %v141 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v137, <32 x i32> %v140) #2 203 %v142 = tail call <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32> %v141, <32 x i32> %v134, i32 -2) 204 %v143 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v142) 205 %v144 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v142) 206 store <32 x i32> %v144, <32 x i32>* undef, align 128 207 store <32 x i32> %v143, <32 x i32>* undef, align 128 208 %v145 = add nuw nsw i32 %v38, 1 209 %v146 = icmp eq i32 %v38, undef 210 br i1 %v146, label %b33, label %b34 211 } 212 213 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } 214 attributes #1 = { nounwind readnone } 215 attributes #2 = { nounwind } 216 attributes #3 = { nobuiltin nounwind } 217