1 ; RUN: llc -march=hexagon -O3 < %s | FileCheck %s 2 ; REQUIRES: asserts 3 4 ; Check that the code compiles successfully. 5 ; CHECK: call f1 6 7 target triple = "hexagon-unknown--elf" 8 9 %s.0 = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [6 x i8] } 10 11 ; Function Attrs: nounwind 12 declare noalias i8* @f0() local_unnamed_addr #0 13 14 ; Function Attrs: nounwind 15 declare void @f1() local_unnamed_addr #0 16 17 ; Function Attrs: nounwind readnone 18 declare <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32>) #1 19 20 ; Function Attrs: nounwind readnone 21 declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1 22 23 ; Function Attrs: nounwind readnone 24 declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) #1 25 26 ; Function Attrs: nounwind readnone 27 declare <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32>, <32 x i32>) #1 28 29 ; Function Attrs: nounwind readnone 30 declare <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32>, i32) #1 31 32 ; Function Attrs: nounwind readnone 33 declare <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32>, <32 x i32>) #1 34 35 ; Function Attrs: nounwind readnone 36 declare <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32>, <64 x i32>) #1 37 38 ; Function Attrs: nounwind readnone 39 declare <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32>, i32) #1 40 41 ; Function Attrs: nounwind readnone 42 declare <64 x i32> @llvm.hexagon.V6.vzh.128B(<32 x i32>) #1 43 44 ; Function Attrs: nounwind readnone 45 declare <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32>, <32 x i32>) #1 46 47 ; Function Attrs: nounwind readnone 48 declare <64 x i32> @llvm.hexagon.V6.vmpyuh.128B(<32 x i32>, i32) #1 49 50 ; Function Attrs: nounwind readnone 51 declare <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32>, <32 x i32>, i32) #1 52 53 ; Function Attrs: nounwind readnone 54 declare <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32>, <32 x i32>, i32) #1 55 56 ; Function Attrs: noreturn nounwind 57 define void @f2(%s.0* noalias nocapture readonly %a01, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) local_unnamed_addr #2 { 58 b0: 59 %v0 = getelementptr inbounds %s.0, %s.0* %a01, i32 0, i32 1 60 %v1 = bitcast i8** %v0 to i16** 61 %v2 = load i16*, i16** %v1, align 4 62 %v3 = tail call i8* @f0() 63 %v4 = icmp sgt i32 %a1, 0 64 %v5 = select i1 %v4, i32 0, i32 %a1 65 %v6 = or i32 %v5, 1 66 %v7 = icmp sgt i32 %v6, 0 67 br i1 %v7, label %b1, label %b2, !prof !1 68 69 b1: ; preds = %b0 70 br label %b4 71 72 b2: ; preds = %b0 73 %v8 = ashr i32 %a6, 6 74 %v9 = mul i32 %v8, 64 75 %v10 = add nsw i32 %v9, 255 76 %v11 = icmp sgt i32 %a6, -193 77 %v12 = ashr i32 %a5, 6 78 %v13 = ashr i32 %a4, 6 79 %v14 = ashr i32 %a2, 6 80 %v15 = icmp ult i32 %v10, 128 81 %v16 = tail call i8* @f0() 82 %v17 = icmp eq i8* %v16, null 83 br i1 %v17, label %b6, label %b3, !prof !2 84 85 b3: ; preds = %b2 86 %v18 = mul nsw i32 %v13, 16 87 %v19 = mul nsw i32 %v13, 19 88 %v20 = mul nsw i32 %v13, 17 89 %v21 = mul nsw i32 %v13, 18 90 br label %b7 91 92 b4: ; preds = %b4, %b1 93 br label %b4 94 95 b5: ; preds = %b8 96 br label %b6 97 98 b6: ; preds = %b5, %b2 99 tail call void @f1() #3 100 unreachable 101 102 b7: ; preds = %b8, %b3 103 %v22 = phi i8* [ %v16, %b3 ], [ %v28, %b8 ] 104 %v23 = phi i32 [ 1, %b3 ], [ %v27, %b8 ] 105 %v24 = sub i32 %v23, %a3 106 %v25 = mul i32 %v24, %v12 107 %v26 = sub i32 %v25, %v14 108 br i1 %v11, label %b9, label %b8 109 110 b8: ; preds = %b13, %b7 111 %v27 = add nuw nsw i32 %v23, 1 112 %v28 = tail call i8* @f0() 113 %v29 = icmp eq i8* %v28, null 114 br i1 %v29, label %b5, label %b7, !prof !2 115 116 b9: ; preds = %b7 117 %v30 = add i32 %v26, %v18 118 %v31 = add i32 %v26, %v19 119 %v32 = add i32 %v26, %v20 120 %v33 = add i32 %v26, %v21 121 %v34 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 undef) #3 122 %v35 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 8) #3 123 %v36 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v35, <32 x i32> %v35) 124 %v37 = bitcast i8* %v22 to i16* 125 br i1 %v15, label %b13, label %b10 126 127 b10: ; preds = %b9 128 %v38 = tail call <64 x i32> @llvm.hexagon.V6.vzh.128B(<32 x i32> undef) #3 129 %v39 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> undef, <64 x i32> %v38) #3 130 %v40 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v39, <64 x i32> %v36) #3 131 %v41 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v40) 132 %v42 = tail call <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32> %v41, i32 4) #3 133 %v43 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> undef, <32 x i32> %v42) 134 %v44 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v43) #3 135 %v45 = tail call <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32> undef, <32 x i32> %v44) #3 136 br label %b11 137 138 b11: ; preds = %b11, %b10 139 %v46 = phi <32 x i32> [ %v120, %b11 ], [ undef, %b10 ] 140 %v47 = phi <32 x i32> [ %v115, %b11 ], [ undef, %b10 ] 141 %v48 = phi <32 x i32> [ %v110, %b11 ], [ undef, %b10 ] 142 %v49 = phi i32 [ %v124, %b11 ], [ 0, %b10 ] 143 %v50 = phi i32 [ %v125, %b11 ], [ undef, %b10 ] 144 %v51 = add i32 %v49, %v33 145 %v52 = shl nsw i32 %v51, 6 146 %v53 = getelementptr inbounds i16, i16* %v2, i32 %v52 147 %v54 = bitcast i16* %v53 to <32 x i32>* 148 %v55 = load <32 x i32>, <32 x i32>* %v54, align 128, !tbaa !3 149 %v56 = add i32 %v49, %v32 150 %v57 = shl nsw i32 %v56, 6 151 %v58 = getelementptr inbounds i16, i16* %v2, i32 %v57 152 %v59 = bitcast i16* %v58 to <32 x i32>* 153 %v60 = load <32 x i32>, <32 x i32>* %v59, align 128, !tbaa !3 154 %v61 = add i32 %v31, %v49 155 %v62 = shl nsw i32 %v61, 6 156 %v63 = getelementptr inbounds i16, i16* %v2, i32 %v62 157 %v64 = bitcast i16* %v63 to <32 x i32>* 158 %v65 = load <32 x i32>, <32 x i32>* %v64, align 128, !tbaa !3 159 %v66 = add i32 %v49, %v30 160 %v67 = shl nsw i32 %v66, 6 161 %v68 = getelementptr inbounds i16, i16* %v2, i32 %v67 162 %v69 = bitcast i16* %v68 to <32 x i32>* 163 %v70 = load <32 x i32>, <32 x i32>* %v69, align 128, !tbaa !3 164 %v71 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v55, <32 x i32> undef, i32 92) 165 %v72 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v71, i32 1) #3 166 %v73 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v72, <32 x i32> %v34) #3 167 %v74 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.128B(<32 x i32> %v73, i32 393222) #3 168 %v75 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v60, <32 x i32> %v48, i32 92) 169 %v76 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v75, i32 1) #3 170 %v77 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v76, <32 x i32> %v34) #3 171 %v78 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v65, <32 x i32> undef, i32 92) 172 %v79 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v78, i32 1) #3 173 %v80 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v79, <32 x i32> %v34) #3 174 %v81 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v77, <32 x i32> %v80) #3 175 %v82 = tail call <64 x i32> @llvm.hexagon.V6.vzh.128B(<32 x i32> %v81) #3 176 %v83 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v74) 177 %v84 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v82) 178 %v85 = tail call <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32> %v83, <32 x i32> %v84, i32 2) #3 179 %v86 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v85, <32 x i32> undef) 180 %v87 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v70, <32 x i32> %v47, i32 92) 181 %v88 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v87, i32 1) #3 182 %v89 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v88, <32 x i32> %v34) #3 183 %v90 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> undef, <32 x i32> %v46, i32 92) 184 %v91 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v90, i32 1) #3 185 %v92 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v91, <32 x i32> %v34) #3 186 %v93 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v89, <32 x i32> %v92) #3 187 %v94 = tail call <64 x i32> @llvm.hexagon.V6.vzh.128B(<32 x i32> %v93) #3 188 %v95 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v86, <64 x i32> %v94) #3 189 %v96 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v95, <64 x i32> %v36) #3 190 %v97 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v96) 191 %v98 = tail call <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32> %v97, i32 4) #3 192 %v99 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v98, <32 x i32> undef) 193 %v100 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v99) #3 194 %v101 = tail call <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32> undef, <32 x i32> %v100) #3 195 %v102 = shl nsw i32 %v49, 6 196 %v103 = getelementptr inbounds i16, i16* %v37, i32 %v102 197 %v104 = bitcast i16* %v103 to <32 x i32>* 198 store <32 x i32> %v101, <32 x i32>* %v104, align 128, !tbaa !6 199 %v105 = or i32 %v49, 1 200 %v106 = add i32 %v105, %v32 201 %v107 = shl nsw i32 %v106, 6 202 %v108 = getelementptr inbounds i16, i16* %v2, i32 %v107 203 %v109 = bitcast i16* %v108 to <32 x i32>* 204 %v110 = load <32 x i32>, <32 x i32>* %v109, align 128, !tbaa !3 205 %v111 = add i32 %v105, %v30 206 %v112 = shl nsw i32 %v111, 6 207 %v113 = getelementptr inbounds i16, i16* %v2, i32 %v112 208 %v114 = bitcast i16* %v113 to <32 x i32>* 209 %v115 = load <32 x i32>, <32 x i32>* %v114, align 128, !tbaa !3 210 %v116 = add i32 %v105, %v26 211 %v117 = shl nsw i32 %v116, 6 212 %v118 = getelementptr inbounds i16, i16* %v2, i32 %v117 213 %v119 = bitcast i16* %v118 to <32 x i32>* 214 %v120 = load <32 x i32>, <32 x i32>* %v119, align 128, !tbaa !3 215 %v121 = shl nsw i32 %v105, 6 216 %v122 = getelementptr inbounds i16, i16* %v37, i32 %v121 217 %v123 = bitcast i16* %v122 to <32 x i32>* 218 store <32 x i32> %v45, <32 x i32>* %v123, align 128, !tbaa !6 219 %v124 = add nuw nsw i32 %v49, 2 220 %v125 = add i32 %v50, -2 221 %v126 = icmp eq i32 %v125, 0 222 br i1 %v126, label %b12, label %b11 223 224 b12: ; preds = %b11 225 br label %b13 226 227 b13: ; preds = %b12, %b9 228 %v127 = phi i32 [ 0, %b9 ], [ %v124, %b12 ] 229 %v128 = add i32 %v127, %v33 230 %v129 = shl nsw i32 %v128, 6 231 %v130 = getelementptr inbounds i16, i16* %v2, i32 %v129 232 %v131 = bitcast i16* %v130 to <32 x i32>* 233 %v132 = load <32 x i32>, <32 x i32>* %v131, align 128, !tbaa !3 234 %v133 = add i32 %v127, %v30 235 %v134 = shl nsw i32 %v133, 6 236 %v135 = getelementptr inbounds i16, i16* %v2, i32 %v134 237 %v136 = bitcast i16* %v135 to <32 x i32>* 238 %v137 = load <32 x i32>, <32 x i32>* %v136, align 128, !tbaa !3 239 %v138 = add i32 %v127, %v26 240 %v139 = shl nsw i32 %v138, 6 241 %v140 = getelementptr inbounds i16, i16* %v2, i32 %v139 242 %v141 = bitcast i16* %v140 to <32 x i32>* 243 %v142 = load <32 x i32>, <32 x i32>* %v141, align 128, !tbaa !3 244 %v143 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v132, <32 x i32> undef, i32 92) 245 %v144 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v143, i32 1) #3 246 %v145 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v144, <32 x i32> %v34) #3 247 %v146 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.128B(<32 x i32> %v145, i32 393222) #3 248 %v147 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v146) 249 %v148 = tail call <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32> %v147, <32 x i32> undef, i32 2) #3 250 %v149 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v148, <32 x i32> undef) 251 %v150 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v137, <32 x i32> undef, i32 92) 252 %v151 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v150, i32 1) #3 253 %v152 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v151, <32 x i32> %v34) #3 254 %v153 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v142, <32 x i32> undef, i32 92) 255 %v154 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v153, i32 1) #3 256 %v155 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v154, <32 x i32> %v34) #3 257 %v156 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v152, <32 x i32> %v155) #3 258 %v157 = tail call <64 x i32> @llvm.hexagon.V6.vzh.128B(<32 x i32> %v156) #3 259 %v158 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v149, <64 x i32> %v157) #3 260 %v159 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v158, <64 x i32> %v36) #3 261 %v160 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v159) 262 %v161 = tail call <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32> %v160, i32 4) #3 263 %v162 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v161, <32 x i32> undef) 264 %v163 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v162) #3 265 %v164 = tail call <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32> %v163, <32 x i32> undef) #3 266 %v165 = getelementptr inbounds i16, i16* %v37, i32 undef 267 %v166 = bitcast i16* %v165 to <32 x i32>* 268 store <32 x i32> %v164, <32 x i32>* %v166, align 128, !tbaa !6 269 br label %b8 270 } 271 272 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } 273 attributes #1 = { nounwind readnone } 274 attributes #2 = { noreturn nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } 275 attributes #3 = { nounwind } 276 277 !llvm.module.flags = !{!0} 278 279 !0 = !{i32 2, !"halide_mattrs", !"+hvxv60,+hvx-length128b"} 280 !1 = !{!"branch_weights", i32 1073741824, i32 0} 281 !2 = !{!"branch_weights", i32 0, i32 1073741824} 282 !3 = !{!4, !4, i64 0} 283 !4 = !{!"input_yuv", !5} 284 !5 = !{!"Halide buffer"} 285 !6 = !{!7, !7, i64 0} 286 !7 = !{!"blurred_ds_y", !5} 287