1 ; RUN: llc -march=hexagon < %s | FileCheck %s 2 ; CHECK-NOT: setbit(r{{[0-9]+}},#1) 3 4 target triple = "hexagon-unknown--elf" 5 6 %s.8 = type { i8*, i32, i32, i32, i32, %s.9*, %s.9*, %s.9* } 7 %s.9 = type { %s.10 } 8 %s.10 = type { i64 } 9 %s.4 = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [6 x i8] } 10 11 @g0 = private constant [6 x i8] c"input\00", align 32 12 @g1 = private constant [11 x i8] c"gaussian11\00", align 32 13 @g2 = private constant [2 x %s.8] [%s.8 { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @g0, i32 0, i32 0), i32 1, i32 2, i32 1, i32 8, %s.9* null, %s.9* null, %s.9* null }, %s.8 { i8* getelementptr inbounds ([11 x i8], [11 x i8]* @g1, i32 0, i32 0), i32 2, i32 2, i32 1, i32 8, %s.9* null, %s.9* null, %s.9* null }] 14 @g3 = private constant [53 x i8] c"hexagon-32-os_unknown-no_asserts-no_bounds_query-hvx\00", align 32 15 16 ; Function Attrs: nounwind 17 declare i8* @f0(i8*, i32) #0 18 19 ; Function Attrs: nounwind 20 declare void @f1(i8*, i8*) #0 21 22 ; Function Attrs: nounwind 23 declare noalias i8* @f2(i8*, i32) #0 24 25 ; Function Attrs: nounwind 26 declare void @f3(i8*, i8*) #0 27 28 ; Function Attrs: nounwind 29 declare void @f4() #0 30 31 ; Function Attrs: nounwind 32 declare void @f5() #0 33 34 ; Function Attrs: nounwind 35 define i32 @f6(%s.4* noalias nocapture readonly %a0, %s.4* noalias nocapture readonly %a1) #0 { 36 b0: 37 %v0 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 1 38 %v1 = load i8*, i8** %v0 39 %v2 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 2, i32 0 40 %v3 = load i32, i32* %v2 41 %v4 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 2, i32 1 42 %v5 = load i32, i32* %v4 43 %v6 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 3, i32 1 44 %v7 = load i32, i32* %v6 45 %v8 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 4, i32 0 46 %v9 = load i32, i32* %v8 47 %v10 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 4, i32 1 48 %v11 = load i32, i32* %v10 49 %v12 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 1 50 %v13 = load i8*, i8** %v12 51 %v14 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 2, i32 0 52 %v15 = load i32, i32* %v14 53 %v16 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 2, i32 1 54 %v17 = load i32, i32* %v16 55 %v18 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 3, i32 1 56 %v19 = load i32, i32* %v18 57 %v20 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 4, i32 0 58 %v21 = load i32, i32* %v20 59 %v22 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 4, i32 1 60 %v23 = load i32, i32* %v22 61 %v24 = add nsw i32 %v21, %v15 62 %v25 = add nsw i32 %v24, -64 63 %v26 = icmp slt i32 %v21, %v25 64 %v27 = select i1 %v26, i32 %v21, i32 %v25 65 %v28 = add nsw i32 %v15, -1 66 %v29 = and i32 %v28, -64 67 %v30 = add i32 %v21, 63 68 %v31 = add i32 %v30, %v29 69 %v32 = add nsw i32 %v24, -1 70 %v33 = icmp slt i32 %v31, %v32 71 %v34 = select i1 %v33, i32 %v31, i32 %v32 72 %v35 = sub nsw i32 %v34, %v27 73 %v36 = icmp slt i32 %v24, %v34 74 %v37 = select i1 %v36, i32 %v34, i32 %v24 75 %v38 = add nsw i32 %v37, -1 76 %v39 = icmp slt i32 %v38, %v34 77 %v40 = select i1 %v39, i32 %v34, i32 %v38 78 %v41 = add nsw i32 %v17, 1 79 %v42 = sext i32 %v41 to i64 80 %v43 = sub nsw i32 %v40, %v27 81 %v44 = add nsw i32 %v43, 2 82 %v45 = sext i32 %v44 to i64 83 %v46 = mul nsw i64 %v45, %v42 84 %v47 = trunc i64 %v46 to i32 85 %v48 = tail call i8* @f2(i8* null, i32 %v47) 86 %v49 = add nsw i32 %v23, -1 87 %v50 = add i32 %v23, %v17 88 %v51 = icmp sgt i32 %v23, %v50 89 br i1 %v51, label %b12, label %b1, !prof !3 90 91 b1: ; preds = %b11, %b0 92 %v52 = phi i32 [ %v220, %b11 ], [ %v49, %b0 ] 93 %v53 = icmp slt i32 %v9, %v24 94 %v54 = select i1 %v53, i32 %v9, i32 %v24 95 %v55 = add nsw i32 %v21, -1 96 %v56 = icmp slt i32 %v54, %v55 97 %v57 = select i1 %v56, i32 %v55, i32 %v54 98 %v58 = add nsw i32 %v9, %v3 99 %v59 = icmp slt i32 %v58, %v24 100 %v60 = select i1 %v59, i32 %v58, i32 %v24 101 %v61 = icmp slt i32 %v60, %v57 102 %v62 = select i1 %v61, i32 %v57, i32 %v60 103 %v63 = icmp slt i32 %v57, %v21 104 br i1 %v63, label %b7, label %b2, !prof !3 105 106 b2: ; preds = %b1 107 %v64 = add nsw i32 %v11, %v5 108 %v65 = add nsw i32 %v64, -1 109 %v66 = icmp slt i32 %v52, %v65 110 br i1 %v66, label %b3, label %b4 111 112 b3: ; preds = %b3, %b2 113 %v67 = phi i32 [ %v96, %b3 ], [ %v55, %b2 ] 114 %v68 = mul nsw i32 %v11, %v7 115 %v69 = icmp slt i32 %v52, %v11 116 %v70 = select i1 %v69, i32 %v11, i32 %v52 117 %v71 = mul nsw i32 %v70, %v7 118 %v72 = add nsw i32 %v58, -1 119 %v73 = icmp slt i32 %v67, %v72 120 %v74 = select i1 %v73, i32 %v67, i32 %v72 121 %v75 = icmp slt i32 %v74, %v9 122 %v76 = select i1 %v75, i32 %v9, i32 %v74 123 %v77 = add i32 %v68, %v9 124 %v78 = sub i32 %v71, %v77 125 %v79 = add i32 %v78, %v76 126 %v80 = getelementptr inbounds i8, i8* %v1, i32 %v79 127 %v81 = load i8, i8* %v80, align 1, !tbaa !4 128 %v82 = icmp sle i32 %v64, %v52 129 %v83 = icmp sle i32 %v58, %v67 130 %v84 = icmp slt i32 %v67, %v9 131 %v85 = or i1 %v84, %v83 132 %v86 = or i1 %v69, %v85 133 %v87 = or i1 %v82, %v86 134 %v88 = select i1 %v87, i8 0, i8 %v81 135 %v89 = sub i32 1, %v23 136 %v90 = add i32 %v89, %v52 137 %v91 = mul nsw i32 %v90, %v44 138 %v92 = sub i32 1, %v27 139 %v93 = add i32 %v92, %v91 140 %v94 = add i32 %v93, %v67 141 %v95 = getelementptr inbounds i8, i8* %v48, i32 %v94 142 store i8 %v88, i8* %v95, align 1, !tbaa !7 143 %v96 = add nsw i32 %v67, 1 144 %v97 = icmp eq i32 %v96, %v57 145 br i1 %v97, label %b7, label %b3 146 147 b4: ; preds = %b2 148 %v98 = icmp slt i32 %v5, 1 149 br i1 %v98, label %b5, label %b6 150 151 b5: ; preds = %b5, %b4 152 %v99 = phi i32 [ %v123, %b5 ], [ %v55, %b4 ] 153 %v100 = add nsw i32 %v58, -1 154 %v101 = icmp slt i32 %v99, %v100 155 %v102 = select i1 %v101, i32 %v99, i32 %v100 156 %v103 = icmp slt i32 %v102, %v9 157 %v104 = select i1 %v103, i32 %v9, i32 %v102 158 %v105 = sub i32 %v104, %v9 159 %v106 = getelementptr inbounds i8, i8* %v1, i32 %v105 160 %v107 = load i8, i8* %v106, align 1, !tbaa !4 161 %v108 = icmp sle i32 %v64, %v52 162 %v109 = icmp slt i32 %v52, %v11 163 %v110 = icmp sle i32 %v58, %v99 164 %v111 = icmp slt i32 %v99, %v9 165 %v112 = or i1 %v111, %v110 166 %v113 = or i1 %v109, %v112 167 %v114 = or i1 %v108, %v113 168 %v115 = select i1 %v114, i8 0, i8 %v107 169 %v116 = sub i32 1, %v23 170 %v117 = add i32 %v116, %v52 171 %v118 = mul nsw i32 %v117, %v44 172 %v119 = sub i32 1, %v27 173 %v120 = add i32 %v119, %v118 174 %v121 = add i32 %v120, %v99 175 %v122 = getelementptr inbounds i8, i8* %v48, i32 %v121 176 store i8 %v115, i8* %v122, align 1, !tbaa !7 177 %v123 = add nsw i32 %v99, 1 178 %v124 = icmp eq i32 %v123, %v57 179 br i1 %v124, label %b7, label %b5 180 181 b6: ; preds = %b6, %b4 182 %v125 = phi i32 [ %v153, %b6 ], [ %v55, %b4 ] 183 %v126 = mul nsw i32 %v11, %v7 184 %v127 = mul nsw i32 %v65, %v7 185 %v128 = add nsw i32 %v58, -1 186 %v129 = icmp slt i32 %v125, %v128 187 %v130 = select i1 %v129, i32 %v125, i32 %v128 188 %v131 = icmp slt i32 %v130, %v9 189 %v132 = select i1 %v131, i32 %v9, i32 %v130 190 %v133 = add i32 %v126, %v9 191 %v134 = sub i32 %v127, %v133 192 %v135 = add i32 %v134, %v132 193 %v136 = getelementptr inbounds i8, i8* %v1, i32 %v135 194 %v137 = load i8, i8* %v136, align 1, !tbaa !4 195 %v138 = icmp sle i32 %v64, %v52 196 %v139 = icmp slt i32 %v52, %v11 197 %v140 = icmp sle i32 %v58, %v125 198 %v141 = icmp slt i32 %v125, %v9 199 %v142 = or i1 %v141, %v140 200 %v143 = or i1 %v139, %v142 201 %v144 = or i1 %v138, %v143 202 %v145 = select i1 %v144, i8 0, i8 %v137 203 %v146 = sub i32 1, %v23 204 %v147 = add i32 %v146, %v52 205 %v148 = mul nsw i32 %v147, %v44 206 %v149 = sub i32 1, %v27 207 %v150 = add i32 %v149, %v148 208 %v151 = add i32 %v150, %v125 209 %v152 = getelementptr inbounds i8, i8* %v48, i32 %v151 210 store i8 %v145, i8* %v152, align 1, !tbaa !7 211 %v153 = add nsw i32 %v125, 1 212 %v154 = icmp eq i32 %v153, %v57 213 br i1 %v154, label %b7, label %b6 214 215 b7: ; preds = %b6, %b5, %b3, %b1 216 %v155 = icmp slt i32 %v57, %v62 217 br i1 %v155, label %b8, label %b9, !prof !9 218 219 b8: ; preds = %b8, %b7 220 %v156 = phi i32 [ %v181, %b8 ], [ %v57, %b7 ] 221 %v157 = mul nsw i32 %v11, %v7 222 %v158 = add nsw i32 %v11, %v5 223 %v159 = add nsw i32 %v158, -1 224 %v160 = icmp slt i32 %v52, %v159 225 %v161 = select i1 %v160, i32 %v52, i32 %v159 226 %v162 = icmp slt i32 %v161, %v11 227 %v163 = select i1 %v162, i32 %v11, i32 %v161 228 %v164 = mul nsw i32 %v163, %v7 229 %v165 = add i32 %v157, %v9 230 %v166 = sub i32 %v164, %v165 231 %v167 = add i32 %v166, %v156 232 %v168 = getelementptr inbounds i8, i8* %v1, i32 %v167 233 %v169 = load i8, i8* %v168, align 1, !tbaa !4 234 %v170 = icmp sle i32 %v158, %v52 235 %v171 = icmp slt i32 %v52, %v11 236 %v172 = or i1 %v171, %v170 237 %v173 = select i1 %v172, i8 0, i8 %v169 238 %v174 = sub i32 1, %v23 239 %v175 = add i32 %v174, %v52 240 %v176 = mul nsw i32 %v175, %v44 241 %v177 = sub i32 1, %v27 242 %v178 = add i32 %v177, %v176 243 %v179 = add i32 %v178, %v156 244 %v180 = getelementptr inbounds i8, i8* %v48, i32 %v179 245 store i8 %v173, i8* %v180, align 1, !tbaa !7 246 %v181 = add nsw i32 %v156, 1 247 %v182 = icmp eq i32 %v181, %v62 248 br i1 %v182, label %b9, label %b8 249 250 b9: ; preds = %b8, %b7 251 %v183 = icmp slt i32 %v62, %v24 252 br i1 %v183, label %b10, label %b11, !prof !9 253 254 b10: ; preds = %b10, %b9 255 %v184 = phi i32 [ %v218, %b10 ], [ %v62, %b9 ] 256 %v185 = mul nsw i32 %v11, %v7 257 %v186 = add nsw i32 %v11, %v5 258 %v187 = add nsw i32 %v186, -1 259 %v188 = icmp slt i32 %v52, %v187 260 %v189 = select i1 %v188, i32 %v52, i32 %v187 261 %v190 = icmp slt i32 %v189, %v11 262 %v191 = select i1 %v190, i32 %v11, i32 %v189 263 %v192 = mul nsw i32 %v191, %v7 264 %v193 = add nsw i32 %v58, -1 265 %v194 = icmp slt i32 %v184, %v193 266 %v195 = select i1 %v194, i32 %v184, i32 %v193 267 %v196 = icmp slt i32 %v195, %v9 268 %v197 = select i1 %v196, i32 %v9, i32 %v195 269 %v198 = add i32 %v185, %v9 270 %v199 = sub i32 %v192, %v198 271 %v200 = add i32 %v199, %v197 272 %v201 = getelementptr inbounds i8, i8* %v1, i32 %v200 273 %v202 = load i8, i8* %v201, align 1, !tbaa !4 274 %v203 = icmp sle i32 %v186, %v52 275 %v204 = icmp slt i32 %v52, %v11 276 %v205 = icmp sle i32 %v58, %v184 277 %v206 = icmp slt i32 %v184, %v9 278 %v207 = or i1 %v206, %v205 279 %v208 = or i1 %v204, %v207 280 %v209 = or i1 %v203, %v208 281 %v210 = select i1 %v209, i8 0, i8 %v202 282 %v211 = sub i32 1, %v23 283 %v212 = add i32 %v211, %v52 284 %v213 = mul nsw i32 %v212, %v44 285 %v214 = sub i32 1, %v27 286 %v215 = add i32 %v214, %v213 287 %v216 = add i32 %v215, %v184 288 %v217 = getelementptr inbounds i8, i8* %v48, i32 %v216 289 store i8 %v210, i8* %v217, align 1, !tbaa !7 290 %v218 = add nsw i32 %v184, 1 291 %v219 = icmp eq i32 %v218, %v24 292 br i1 %v219, label %b11, label %b10 293 294 b11: ; preds = %b10, %b9 295 %v220 = add nsw i32 %v52, 1 296 %v221 = icmp eq i32 %v220, %v50 297 br i1 %v221, label %b12, label %b1 298 299 b12: ; preds = %b11, %b0 300 %v222 = add nsw i32 %v35, 1 301 %v223 = sext i32 %v222 to i64 302 %v224 = shl nsw i64 %v42, 2 303 %v225 = mul i64 %v224, %v223 304 %v226 = trunc i64 %v225 to i32 305 %v227 = tail call i8* @f2(i8* null, i32 %v226) 306 br i1 %v51, label %b14, label %b13, !prof !3 307 308 b13: ; preds = %b19, %b12 309 %v228 = phi i32 [ %v351, %b19 ], [ %v49, %b12 ] 310 %v229 = ashr i32 %v15, 6 311 %v230 = icmp slt i32 %v229, 0 312 %v231 = select i1 %v230, i32 0, i32 %v229 313 %v232 = icmp sgt i32 %v231, 0 314 br i1 %v232, label %b16, label %b17, !prof !9 315 316 b14: ; preds = %b19, %b12 317 %v233 = icmp eq i8* %v48, null 318 br i1 %v233, label %b20, label %b15 319 320 b15: ; preds = %b14 321 tail call void @f3(i8* null, i8* %v48) #2 322 br label %b20 323 324 b16: ; preds = %b16, %b13 325 %v234 = phi i32 [ %v289, %b16 ], [ 0, %b13 ] 326 %v235 = sub nsw i32 %v228, %v23 327 %v236 = add nsw i32 %v235, 1 328 %v237 = mul nsw i32 %v236, %v44 329 %v238 = shl i32 %v234, 6 330 %v239 = sub i32 %v21, %v27 331 %v240 = add i32 %v239, %v238 332 %v241 = add nsw i32 %v240, %v237 333 %v242 = getelementptr inbounds i8, i8* %v48, i32 %v241 334 %v243 = bitcast i8* %v242 to <16 x i32>* 335 %v244 = load <16 x i32>, <16 x i32>* %v243, align 1, !tbaa !7 336 %v245 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %v244) 337 %v246 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v245) 338 %v247 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v245) 339 %v248 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v247) 340 %v249 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v246) 341 %v250 = add nsw i32 %v241, 1 342 %v251 = getelementptr inbounds i8, i8* %v48, i32 %v250 343 %v252 = bitcast i8* %v251 to <16 x i32>* 344 %v253 = load <16 x i32>, <16 x i32>* %v252, align 1, !tbaa !7 345 %v254 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %v253) 346 %v255 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v254) 347 %v256 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v255) 348 %v257 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v256) 349 %v258 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v256) 350 %v259 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v257, i32 168430090) 351 %v260 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v258, i32 168430090) 352 %v261 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v259, <16 x i32> %v260) 353 %v262 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v254) 354 %v263 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v262) 355 %v264 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v263) 356 %v265 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v263) 357 %v266 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v264, i32 168430090) 358 %v267 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v265, i32 168430090) 359 %v268 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v266, <16 x i32> %v267) 360 %v269 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v248, <32 x i32> %v261) 361 %v270 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v249, <32 x i32> %v268) 362 %v271 = shufflevector <32 x i32> %v269, <32 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 363 %v272 = mul nsw i32 %v236, %v222 364 %v273 = add nsw i32 %v240, %v272 365 %v274 = bitcast i8* %v227 to i32* 366 %v275 = getelementptr inbounds i32, i32* %v274, i32 %v273 367 %v276 = bitcast i32* %v275 to <16 x i32>* 368 store <16 x i32> %v271, <16 x i32>* %v276, align 4, !tbaa !10 369 %v277 = shufflevector <32 x i32> %v269, <32 x i32> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 370 %v278 = add nsw i32 %v273, 16 371 %v279 = getelementptr inbounds i32, i32* %v274, i32 %v278 372 %v280 = bitcast i32* %v279 to <16 x i32>* 373 store <16 x i32> %v277, <16 x i32>* %v280, align 4, !tbaa !10 374 %v281 = shufflevector <32 x i32> %v270, <32 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 375 %v282 = add nsw i32 %v273, 32 376 %v283 = getelementptr inbounds i32, i32* %v274, i32 %v282 377 %v284 = bitcast i32* %v283 to <16 x i32>* 378 store <16 x i32> %v281, <16 x i32>* %v284, align 4, !tbaa !10 379 %v285 = shufflevector <32 x i32> %v270, <32 x i32> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 380 %v286 = add nsw i32 %v273, 48 381 %v287 = getelementptr inbounds i32, i32* %v274, i32 %v286 382 %v288 = bitcast i32* %v287 to <16 x i32>* 383 store <16 x i32> %v285, <16 x i32>* %v288, align 4, !tbaa !10 384 %v289 = add nuw nsw i32 %v234, 1 385 %v290 = icmp eq i32 %v289, %v231 386 br i1 %v290, label %b17, label %b16 387 388 b17: ; preds = %b16, %b13 389 %v291 = add nsw i32 %v15, 63 390 %v292 = ashr i32 %v291, 6 391 %v293 = icmp slt i32 %v231, %v292 392 br i1 %v293, label %b18, label %b19, !prof !9 393 394 b18: ; preds = %b18, %b17 395 %v294 = phi i32 [ %v349, %b18 ], [ %v231, %b17 ] 396 %v295 = sub nsw i32 %v228, %v23 397 %v296 = add nsw i32 %v295, 1 398 %v297 = mul nsw i32 %v296, %v44 399 %v298 = sub nsw i32 %v24, %v27 400 %v299 = add nsw i32 %v297, %v298 401 %v300 = add nsw i32 %v299, -64 402 %v301 = getelementptr inbounds i8, i8* %v48, i32 %v300 403 %v302 = bitcast i8* %v301 to <16 x i32>* 404 %v303 = load <16 x i32>, <16 x i32>* %v302, align 1, !tbaa !7 405 %v304 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %v303) 406 %v305 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v304) 407 %v306 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v304) 408 %v307 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v306) 409 %v308 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v305) 410 %v309 = add nsw i32 %v299, -63 411 %v310 = getelementptr inbounds i8, i8* %v48, i32 %v309 412 %v311 = bitcast i8* %v310 to <16 x i32>* 413 %v312 = load <16 x i32>, <16 x i32>* %v311, align 1, !tbaa !7 414 %v313 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %v312) 415 %v314 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v313) 416 %v315 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v314) 417 %v316 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v315) 418 %v317 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v315) 419 %v318 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v316, i32 168430090) 420 %v319 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v317, i32 168430090) 421 %v320 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v318, <16 x i32> %v319) 422 %v321 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v313) 423 %v322 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v321) 424 %v323 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v322) 425 %v324 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v322) 426 %v325 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v323, i32 168430090) 427 %v326 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v324, i32 168430090) 428 %v327 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v325, <16 x i32> %v326) 429 %v328 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v307, <32 x i32> %v320) 430 %v329 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v308, <32 x i32> %v327) 431 %v330 = shufflevector <32 x i32> %v328, <32 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 432 %v331 = mul nsw i32 %v296, %v222 433 %v332 = add nsw i32 %v331, %v298 434 %v333 = add nsw i32 %v332, -64 435 %v334 = bitcast i8* %v227 to i32* 436 %v335 = getelementptr inbounds i32, i32* %v334, i32 %v333 437 %v336 = bitcast i32* %v335 to <16 x i32>* 438 store <16 x i32> %v330, <16 x i32>* %v336, align 4, !tbaa !10 439 %v337 = shufflevector <32 x i32> %v328, <32 x i32> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 440 %v338 = add nsw i32 %v332, -48 441 %v339 = getelementptr inbounds i32, i32* %v334, i32 %v338 442 %v340 = bitcast i32* %v339 to <16 x i32>* 443 store <16 x i32> %v337, <16 x i32>* %v340, align 4, !tbaa !10 444 %v341 = shufflevector <32 x i32> %v329, <32 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 445 %v342 = add nsw i32 %v332, -32 446 %v343 = getelementptr inbounds i32, i32* %v334, i32 %v342 447 %v344 = bitcast i32* %v343 to <16 x i32>* 448 store <16 x i32> %v341, <16 x i32>* %v344, align 4, !tbaa !10 449 %v345 = shufflevector <32 x i32> %v329, <32 x i32> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 450 %v346 = add nsw i32 %v332, -16 451 %v347 = getelementptr inbounds i32, i32* %v334, i32 %v346 452 %v348 = bitcast i32* %v347 to <16 x i32>* 453 store <16 x i32> %v345, <16 x i32>* %v348, align 4, !tbaa !10 454 %v349 = add nuw nsw i32 %v294, 1 455 %v350 = icmp eq i32 %v349, %v292 456 br i1 %v350, label %b19, label %b18 457 458 b19: ; preds = %b18, %b17 459 %v351 = add nsw i32 %v228, 1 460 %v352 = icmp eq i32 %v351, %v50 461 br i1 %v352, label %b14, label %b13 462 463 b20: ; preds = %b15, %b14 464 %v353 = icmp sgt i32 %v17, 0 465 br i1 %v353, label %b21, label %b31, !prof !9 466 467 b21: ; preds = %b20 468 %v354 = ashr i32 %v15, 6 469 %v355 = icmp slt i32 %v354, 0 470 %v356 = select i1 %v355, i32 0, i32 %v354 471 %v357 = icmp sgt i32 %v356, 0 472 br i1 %v357, label %b25, label %b27 473 474 b22: ; preds = %b25, %b22 475 %v358 = phi i32 [ %v442, %b22 ], [ 0, %b25 ] 476 %v359 = sub nsw i32 %v525, %v23 477 %v360 = mul nsw i32 %v359, %v222 478 %v361 = shl nsw i32 %v358, 6 479 %v362 = add nsw i32 %v361, %v21 480 %v363 = sub nsw i32 %v362, %v27 481 %v364 = add nsw i32 %v363, %v360 482 %v365 = bitcast i8* %v227 to i32* 483 %v366 = getelementptr inbounds i32, i32* %v365, i32 %v364 484 %v367 = bitcast i32* %v366 to <16 x i32>* 485 %v368 = load <16 x i32>, <16 x i32>* %v367, align 4, !tbaa !10 486 %v369 = add nsw i32 %v364, 16 487 %v370 = getelementptr inbounds i32, i32* %v365, i32 %v369 488 %v371 = bitcast i32* %v370 to <16 x i32>* 489 %v372 = load <16 x i32>, <16 x i32>* %v371, align 4, !tbaa !10 490 %v373 = shufflevector <16 x i32> %v368, <16 x i32> %v372, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 491 %v374 = add nsw i32 %v359, 1 492 %v375 = mul nsw i32 %v374, %v222 493 %v376 = add nsw i32 %v363, %v375 494 %v377 = getelementptr inbounds i32, i32* %v365, i32 %v376 495 %v378 = bitcast i32* %v377 to <16 x i32>* 496 %v379 = load <16 x i32>, <16 x i32>* %v378, align 4, !tbaa !10 497 %v380 = add nsw i32 %v376, 16 498 %v381 = getelementptr inbounds i32, i32* %v365, i32 %v380 499 %v382 = bitcast i32* %v381 to <16 x i32>* 500 %v383 = load <16 x i32>, <16 x i32>* %v382, align 4, !tbaa !10 501 %v384 = shufflevector <16 x i32> %v379, <16 x i32> %v383, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 502 %v385 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v384) 503 %v386 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v384) 504 %v387 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v385, i32 168430090) 505 %v388 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v386, i32 168430090) 506 %v389 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v387, <16 x i32> %v388) 507 %v390 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v373, <32 x i32> %v389) 508 %v391 = shufflevector <32 x i32> %v390, <32 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 509 %v392 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v391, i32 20) 510 %v393 = shufflevector <32 x i32> %v390, <32 x i32> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 511 %v394 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v393, i32 20) 512 %v395 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v394, <16 x i32> %v392) 513 %v396 = add nsw i32 %v364, 32 514 %v397 = getelementptr inbounds i32, i32* %v365, i32 %v396 515 %v398 = bitcast i32* %v397 to <16 x i32>* 516 %v399 = load <16 x i32>, <16 x i32>* %v398, align 4, !tbaa !10 517 %v400 = add nsw i32 %v364, 48 518 %v401 = getelementptr inbounds i32, i32* %v365, i32 %v400 519 %v402 = bitcast i32* %v401 to <16 x i32>* 520 %v403 = load <16 x i32>, <16 x i32>* %v402, align 4, !tbaa !10 521 %v404 = shufflevector <16 x i32> %v399, <16 x i32> %v403, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 522 %v405 = add nsw i32 %v376, 32 523 %v406 = getelementptr inbounds i32, i32* %v365, i32 %v405 524 %v407 = bitcast i32* %v406 to <16 x i32>* 525 %v408 = load <16 x i32>, <16 x i32>* %v407, align 4, !tbaa !10 526 %v409 = add nsw i32 %v376, 48 527 %v410 = getelementptr inbounds i32, i32* %v365, i32 %v409 528 %v411 = bitcast i32* %v410 to <16 x i32>* 529 %v412 = load <16 x i32>, <16 x i32>* %v411, align 4, !tbaa !10 530 %v413 = shufflevector <16 x i32> %v408, <16 x i32> %v412, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 531 %v414 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v413) 532 %v415 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v413) 533 %v416 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v414, i32 168430090) 534 %v417 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v415, i32 168430090) 535 %v418 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v416, <16 x i32> %v417) 536 %v419 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v404, <32 x i32> %v418) 537 %v420 = shufflevector <32 x i32> %v419, <32 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 538 %v421 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v420, i32 20) 539 %v422 = shufflevector <32 x i32> %v419, <32 x i32> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 540 %v423 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v422, i32 20) 541 %v424 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v423, <16 x i32> %v421) 542 %v425 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v395) 543 %v426 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v395) 544 %v427 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v425, <16 x i32> %v426) 545 %v428 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v424) 546 %v429 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v424) 547 %v430 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v428, <16 x i32> %v429) 548 %v431 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v430, <16 x i32> %v427) 549 %v432 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v431) 550 %v433 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v431) 551 %v434 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %v432, <16 x i32> %v433) 552 %v435 = mul nsw i32 %v23, %v19 553 %v436 = mul nsw i32 %v525, %v19 554 %v437 = add i32 %v435, %v21 555 %v438 = sub i32 %v436, %v437 556 %v439 = add i32 %v438, %v362 557 %v440 = getelementptr inbounds i8, i8* %v13, i32 %v439 558 %v441 = bitcast i8* %v440 to <16 x i32>* 559 store <16 x i32> %v434, <16 x i32>* %v441, align 1, !tbaa !12 560 %v442 = add nuw nsw i32 %v358, 1 561 %v443 = icmp eq i32 %v442, %v356 562 br i1 %v443, label %b26, label %b22 563 564 b23: ; preds = %b26, %b23 565 %v444 = phi i32 [ %v521, %b23 ], [ %v356, %b26 ] 566 %v445 = sub nsw i32 %v24, %v27 567 %v446 = add nsw i32 %v360, %v445 568 %v447 = add nsw i32 %v446, -64 569 %v448 = getelementptr inbounds i32, i32* %v365, i32 %v447 570 %v449 = bitcast i32* %v448 to <16 x i32>* 571 %v450 = load <16 x i32>, <16 x i32>* %v449, align 4, !tbaa !10 572 %v451 = add nsw i32 %v446, -48 573 %v452 = getelementptr inbounds i32, i32* %v365, i32 %v451 574 %v453 = bitcast i32* %v452 to <16 x i32>* 575 %v454 = load <16 x i32>, <16 x i32>* %v453, align 4, !tbaa !10 576 %v455 = shufflevector <16 x i32> %v450, <16 x i32> %v454, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 577 %v456 = add nsw i32 %v375, %v445 578 %v457 = add nsw i32 %v456, -64 579 %v458 = getelementptr inbounds i32, i32* %v365, i32 %v457 580 %v459 = bitcast i32* %v458 to <16 x i32>* 581 %v460 = load <16 x i32>, <16 x i32>* %v459, align 4, !tbaa !10 582 %v461 = add nsw i32 %v456, -48 583 %v462 = getelementptr inbounds i32, i32* %v365, i32 %v461 584 %v463 = bitcast i32* %v462 to <16 x i32>* 585 %v464 = load <16 x i32>, <16 x i32>* %v463, align 4, !tbaa !10 586 %v465 = shufflevector <16 x i32> %v460, <16 x i32> %v464, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 587 %v466 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v465) 588 %v467 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v465) 589 %v468 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v466, i32 168430090) 590 %v469 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v467, i32 168430090) 591 %v470 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v468, <16 x i32> %v469) 592 %v471 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v455, <32 x i32> %v470) 593 %v472 = shufflevector <32 x i32> %v471, <32 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 594 %v473 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v472, i32 20) 595 %v474 = shufflevector <32 x i32> %v471, <32 x i32> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 596 %v475 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v474, i32 20) 597 %v476 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v475, <16 x i32> %v473) 598 %v477 = add nsw i32 %v446, -32 599 %v478 = getelementptr inbounds i32, i32* %v365, i32 %v477 600 %v479 = bitcast i32* %v478 to <16 x i32>* 601 %v480 = load <16 x i32>, <16 x i32>* %v479, align 4, !tbaa !10 602 %v481 = add nsw i32 %v446, -16 603 %v482 = getelementptr inbounds i32, i32* %v365, i32 %v481 604 %v483 = bitcast i32* %v482 to <16 x i32>* 605 %v484 = load <16 x i32>, <16 x i32>* %v483, align 4, !tbaa !10 606 %v485 = shufflevector <16 x i32> %v480, <16 x i32> %v484, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 607 %v486 = add nsw i32 %v456, -32 608 %v487 = getelementptr inbounds i32, i32* %v365, i32 %v486 609 %v488 = bitcast i32* %v487 to <16 x i32>* 610 %v489 = load <16 x i32>, <16 x i32>* %v488, align 4, !tbaa !10 611 %v490 = add nsw i32 %v456, -16 612 %v491 = getelementptr inbounds i32, i32* %v365, i32 %v490 613 %v492 = bitcast i32* %v491 to <16 x i32>* 614 %v493 = load <16 x i32>, <16 x i32>* %v492, align 4, !tbaa !10 615 %v494 = shufflevector <16 x i32> %v489, <16 x i32> %v493, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 616 %v495 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v494) 617 %v496 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v494) 618 %v497 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v495, i32 168430090) 619 %v498 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v496, i32 168430090) 620 %v499 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v497, <16 x i32> %v498) 621 %v500 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v485, <32 x i32> %v499) 622 %v501 = shufflevector <32 x i32> %v500, <32 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 623 %v502 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v501, i32 20) 624 %v503 = shufflevector <32 x i32> %v500, <32 x i32> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 625 %v504 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v503, i32 20) 626 %v505 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v504, <16 x i32> %v502) 627 %v506 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v476) 628 %v507 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v476) 629 %v508 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v506, <16 x i32> %v507) 630 %v509 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v505) 631 %v510 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v505) 632 %v511 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v509, <16 x i32> %v510) 633 %v512 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v511, <16 x i32> %v508) 634 %v513 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v512) 635 %v514 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v512) 636 %v515 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %v513, <16 x i32> %v514) 637 %v516 = add i32 %v15, -64 638 %v517 = sub i32 %v516, %v435 639 %v518 = add i32 %v517, %v436 640 %v519 = getelementptr inbounds i8, i8* %v13, i32 %v518 641 %v520 = bitcast i8* %v519 to <16 x i32>* 642 store <16 x i32> %v515, <16 x i32>* %v520, align 1, !tbaa !12 643 %v521 = add nuw nsw i32 %v444, 1 644 %v522 = icmp eq i32 %v521, %v527 645 br i1 %v522, label %b24, label %b23 646 647 b24: ; preds = %b26, %b23 648 %v523 = add nsw i32 %v525, 1 649 %v524 = icmp eq i32 %v523, %v50 650 br i1 %v524, label %b32, label %b25 651 652 b25: ; preds = %b24, %b21 653 %v525 = phi i32 [ %v523, %b24 ], [ %v23, %b21 ] 654 br label %b22 655 656 b26: ; preds = %b22 657 %v526 = add nsw i32 %v15, 63 658 %v527 = ashr i32 %v526, 6 659 %v528 = icmp slt i32 %v356, %v527 660 br i1 %v528, label %b23, label %b24, !prof !9 661 662 b27: ; preds = %b21 663 %v529 = add nsw i32 %v15, 63 664 %v530 = ashr i32 %v529, 6 665 %v531 = icmp slt i32 %v356, %v530 666 br i1 %v531, label %b29, label %b31 667 668 b28: ; preds = %b29, %b28 669 %v532 = phi i32 [ %v616, %b28 ], [ %v356, %b29 ] 670 %v533 = sub nsw i32 %v618, %v23 671 %v534 = mul nsw i32 %v533, %v222 672 %v535 = sub nsw i32 %v24, %v27 673 %v536 = add nsw i32 %v534, %v535 674 %v537 = add nsw i32 %v536, -64 675 %v538 = bitcast i8* %v227 to i32* 676 %v539 = getelementptr inbounds i32, i32* %v538, i32 %v537 677 %v540 = bitcast i32* %v539 to <16 x i32>* 678 %v541 = load <16 x i32>, <16 x i32>* %v540, align 4, !tbaa !10 679 %v542 = add nsw i32 %v536, -48 680 %v543 = getelementptr inbounds i32, i32* %v538, i32 %v542 681 %v544 = bitcast i32* %v543 to <16 x i32>* 682 %v545 = load <16 x i32>, <16 x i32>* %v544, align 4, !tbaa !10 683 %v546 = shufflevector <16 x i32> %v541, <16 x i32> %v545, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 684 %v547 = add nsw i32 %v533, 1 685 %v548 = mul nsw i32 %v547, %v222 686 %v549 = add nsw i32 %v548, %v535 687 %v550 = add nsw i32 %v549, -64 688 %v551 = getelementptr inbounds i32, i32* %v538, i32 %v550 689 %v552 = bitcast i32* %v551 to <16 x i32>* 690 %v553 = load <16 x i32>, <16 x i32>* %v552, align 4, !tbaa !10 691 %v554 = add nsw i32 %v549, -48 692 %v555 = getelementptr inbounds i32, i32* %v538, i32 %v554 693 %v556 = bitcast i32* %v555 to <16 x i32>* 694 %v557 = load <16 x i32>, <16 x i32>* %v556, align 4, !tbaa !10 695 %v558 = shufflevector <16 x i32> %v553, <16 x i32> %v557, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 696 %v559 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v558) 697 %v560 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v558) 698 %v561 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v559, i32 168430090) 699 %v562 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v560, i32 168430090) 700 %v563 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v561, <16 x i32> %v562) 701 %v564 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v546, <32 x i32> %v563) 702 %v565 = shufflevector <32 x i32> %v564, <32 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 703 %v566 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v565, i32 20) 704 %v567 = shufflevector <32 x i32> %v564, <32 x i32> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 705 %v568 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v567, i32 20) 706 %v569 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v568, <16 x i32> %v566) 707 %v570 = add nsw i32 %v536, -32 708 %v571 = getelementptr inbounds i32, i32* %v538, i32 %v570 709 %v572 = bitcast i32* %v571 to <16 x i32>* 710 %v573 = load <16 x i32>, <16 x i32>* %v572, align 4, !tbaa !10 711 %v574 = add nsw i32 %v536, -16 712 %v575 = getelementptr inbounds i32, i32* %v538, i32 %v574 713 %v576 = bitcast i32* %v575 to <16 x i32>* 714 %v577 = load <16 x i32>, <16 x i32>* %v576, align 4, !tbaa !10 715 %v578 = shufflevector <16 x i32> %v573, <16 x i32> %v577, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 716 %v579 = add nsw i32 %v549, -32 717 %v580 = getelementptr inbounds i32, i32* %v538, i32 %v579 718 %v581 = bitcast i32* %v580 to <16 x i32>* 719 %v582 = load <16 x i32>, <16 x i32>* %v581, align 4, !tbaa !10 720 %v583 = add nsw i32 %v549, -16 721 %v584 = getelementptr inbounds i32, i32* %v538, i32 %v583 722 %v585 = bitcast i32* %v584 to <16 x i32>* 723 %v586 = load <16 x i32>, <16 x i32>* %v585, align 4, !tbaa !10 724 %v587 = shufflevector <16 x i32> %v582, <16 x i32> %v586, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 725 %v588 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v587) 726 %v589 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v587) 727 %v590 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v588, i32 168430090) 728 %v591 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v589, i32 168430090) 729 %v592 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v590, <16 x i32> %v591) 730 %v593 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v578, <32 x i32> %v592) 731 %v594 = shufflevector <32 x i32> %v593, <32 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 732 %v595 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v594, i32 20) 733 %v596 = shufflevector <32 x i32> %v593, <32 x i32> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 734 %v597 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v596, i32 20) 735 %v598 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v597, <16 x i32> %v595) 736 %v599 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v569) 737 %v600 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v569) 738 %v601 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v599, <16 x i32> %v600) 739 %v602 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v598) 740 %v603 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v598) 741 %v604 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v602, <16 x i32> %v603) 742 %v605 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v604, <16 x i32> %v601) 743 %v606 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v605) 744 %v607 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v605) 745 %v608 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %v606, <16 x i32> %v607) 746 %v609 = mul nsw i32 %v23, %v19 747 %v610 = mul nsw i32 %v618, %v19 748 %v611 = add i32 %v15, -64 749 %v612 = sub i32 %v611, %v609 750 %v613 = add i32 %v612, %v610 751 %v614 = getelementptr inbounds i8, i8* %v13, i32 %v613 752 %v615 = bitcast i8* %v614 to <16 x i32>* 753 store <16 x i32> %v608, <16 x i32>* %v615, align 1, !tbaa !12 754 %v616 = add nuw nsw i32 %v532, 1 755 %v617 = icmp eq i32 %v616, %v530 756 br i1 %v617, label %b30, label %b28 757 758 b29: ; preds = %b30, %b27 759 %v618 = phi i32 [ %v619, %b30 ], [ %v23, %b27 ] 760 br label %b28 761 762 b30: ; preds = %b28 763 %v619 = add nsw i32 %v618, 1 764 %v620 = icmp eq i32 %v619, %v50 765 br i1 %v620, label %b32, label %b29 766 767 b31: ; preds = %b27, %b20 768 %v621 = icmp eq i8* %v227, null 769 br i1 %v621, label %b33, label %b32 770 771 b32: ; preds = %b31, %b30, %b24 772 tail call void @f3(i8* null, i8* %v227) #2 773 br label %b33 774 775 b33: ; preds = %b32, %b31 776 ret i32 0 777 } 778 779 ; Function Attrs: nounwind readnone 780 declare <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32>) #1 781 782 ; Function Attrs: nounwind readnone 783 declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1 784 785 ; Function Attrs: nounwind readnone 786 declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1 787 788 ; Function Attrs: nounwind readnone 789 declare <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32>) #1 790 791 ; Function Attrs: nounwind readnone 792 declare <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32>, i32) #1 793 794 ; Function Attrs: nounwind readnone 795 declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1 796 797 ; Function Attrs: nounwind readnone 798 declare <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32>, <32 x i32>) #1 799 800 ; Function Attrs: nounwind readnone 801 declare <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32>, i32) #1 802 803 ; Function Attrs: nounwind readnone 804 declare <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32>, <16 x i32>) #1 805 806 ; Function Attrs: nounwind readnone 807 declare <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32>, <16 x i32>) #1 808 809 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } 810 attributes #1 = { nounwind readnone } 811 attributes #2 = { nobuiltin nounwind } 812 813 !llvm.module.flags = !{!0, !1, !2} 814 815 !0 = !{i32 2, !"halide_use_soft_float_abi", i32 0} 816 !1 = !{i32 2, !"halide_mcpu", !"hexagonv60"} 817 !2 = !{i32 2, !"halide_mattrs", !"+hvx"} 818 !3 = !{!"branch_weights", i32 0, i32 1073741824} 819 !4 = !{!5, !5, i64 0} 820 !5 = !{!"input", !6} 821 !6 = !{!"Halide buffer"} 822 !7 = !{!8, !8, i64 0} 823 !8 = !{!"constant_exterior", !6} 824 !9 = !{!"branch_weights", i32 1073741824, i32 0} 825 !10 = !{!11, !11, i64 0} 826 !11 = !{!"rows", !6} 827 !12 = !{!13, !13, i64 0} 828 !13 = !{!"gaussian11", !6} 829