1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2 ; RUN: opt -S -instcombine %s | FileCheck %s 3 4 define <1 x i8> @test1(<8 x i8> %in) { 5 ; CHECK-LABEL: @test1( 6 ; CHECK-NEXT: [[VEC:%.*]] = shufflevector <8 x i8> %in, <8 x i8> undef, <1 x i32> <i32 5> 7 ; CHECK-NEXT: ret <1 x i8> [[VEC]] 8 ; 9 %val = extractelement <8 x i8> %in, i32 5 10 %vec = insertelement <1 x i8> undef, i8 %val, i32 0 11 ret <1 x i8> %vec 12 } 13 14 define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) { 15 ; CHECK-LABEL: @test2( 16 ; CHECK-NEXT: [[VEC_3:%.*]] = shufflevector <8 x i16> %in2, <8 x i16> %in, <4 x i32> <i32 11, i32 9, i32 0, i32 10> 17 ; CHECK-NEXT: ret <4 x i16> [[VEC_3]] 18 ; 19 %elt0 = extractelement <8 x i16> %in, i32 3 20 %elt1 = extractelement <8 x i16> %in, i32 1 21 %elt2 = extractelement <8 x i16> %in2, i32 0 22 %elt3 = extractelement <8 x i16> %in, i32 2 23 24 %vec.0 = insertelement <4 x i16> undef, i16 %elt0, i32 0 25 %vec.1 = insertelement <4 x i16> %vec.0, i16 %elt1, i32 1 26 %vec.2 = insertelement <4 x i16> %vec.1, i16 %elt2, i32 2 27 %vec.3 = insertelement <4 x i16> %vec.2, i16 %elt3, i32 3 28 29 ret <4 x i16> %vec.3 30 } 31 32 define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) { 33 ; CHECK-LABEL: @test_vcopyq_lane_p64( 34 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i64> %b, <1 x i64> undef, <2 x i32> <i32 0, i32 undef> 35 ; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> %a, <2 x i64> [[TMP1]], <2 x i32> <i32 0, i32 2> 36 ; CHECK-NEXT: ret <2 x i64> [[RES]] 37 ; 38 %elt = extractelement <1 x i64> %b, i32 0 39 %res = insertelement <2 x i64> %a, i64 %elt, i32 1 40 ret <2 x i64> %res 41 } 42 43 ; PR2109: https://llvm.org/bugs/show_bug.cgi?id=2109 44 45 define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) { 46 ; CHECK-LABEL: @widen_extract2( 47 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 48 ; CHECK-NEXT: [[I2:%.*]] = shufflevector <4 x float> %ins, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 4, i32 2, i32 5> 49 ; CHECK-NEXT: ret <4 x float> [[I2]] 50 ; 51 %e1 = extractelement <2 x float> %ext, i32 0 52 %e2 = extractelement <2 x float> %ext, i32 1 53 %i1 = insertelement <4 x float> %ins, float %e1, i32 1 54 %i2 = insertelement <4 x float> %i1, float %e2, i32 3 55 ret <4 x float> %i2 56 } 57 58 define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) { 59 ; CHECK-LABEL: @widen_extract3( 60 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> %ext, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 61 ; CHECK-NEXT: [[I3:%.*]] = shufflevector <4 x float> %ins, <4 x float> [[TMP1]], <4 x i32> <i32 6, i32 5, i32 4, i32 3> 62 ; CHECK-NEXT: ret <4 x float> [[I3]] 63 ; 64 %e1 = extractelement <3 x float> %ext, i32 0 65 %e2 = extractelement <3 x float> %ext, i32 1 66 %e3 = extractelement <3 x float> %ext, i32 2 67 %i1 = insertelement <4 x float> %ins, float %e1, i32 2 68 %i2 = insertelement <4 x float> %i1, float %e2, i32 1 69 %i3 = insertelement <4 x float> %i2, float %e3, i32 0 70 ret <4 x float> %i3 71 } 72 73 define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) { 74 ; CHECK-LABEL: @widen_extract4( 75 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 76 ; CHECK-NEXT: [[I1:%.*]] = shufflevector <8 x float> %ins, <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 7> 77 ; CHECK-NEXT: ret <8 x float> [[I1]] 78 ; 79 %e1 = extractelement <2 x float> %ext, i32 0 80 %i1 = insertelement <8 x float> %ins, float %e1, i32 2 81 ret <8 x float> %i1 82 } 83 84 ; PR26015: https://llvm.org/bugs/show_bug.cgi?id=26015 85 ; The widening shuffle must be inserted before any uses. 86 87 define <8 x i16> @pr26015(<4 x i16> %t0) { 88 ; CHECK-LABEL: @pr26015( 89 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 90 ; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 10, i32 4, i32 5, i32 6, i32 11> 91 ; CHECK-NEXT: ret <8 x i16> [[T5]] 92 ; 93 %t1 = extractelement <4 x i16> %t0, i32 2 94 %t2 = insertelement <8 x i16> zeroinitializer, i16 %t1, i32 3 95 %t3 = insertelement <8 x i16> %t2, i16 0, i32 6 96 %t4 = extractelement <4 x i16> %t0, i32 3 97 %t5 = insertelement <8 x i16> %t3, i16 %t4, i32 7 98 ret <8 x i16> %t5 99 } 100 101 ; PR25999: https://llvm.org/bugs/show_bug.cgi?id=25999 102 ; TODO: The widening shuffle could be inserted at the start of the function to allow the first extract to use it. 103 104 define <8 x i16> @pr25999(<4 x i16> %t0, i1 %b) { 105 ; CHECK-LABEL: @pr25999( 106 ; CHECK-NEXT: [[T1:%.*]] = extractelement <4 x i16> %t0, i32 2 107 ; CHECK-NEXT: br i1 %b, label %if, label %end 108 ; CHECK: if: 109 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 110 ; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, i16 [[T1]], i32 3 111 ; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11> 112 ; CHECK-NEXT: ret <8 x i16> [[T5]] 113 ; CHECK: end: 114 ; CHECK-NEXT: [[A1:%.*]] = add i16 [[T1]], 4 115 ; CHECK-NEXT: [[T6:%.*]] = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 [[A1]], i32 0 116 ; CHECK-NEXT: ret <8 x i16> [[T6]] 117 ; 118 119 %t1 = extractelement <4 x i16> %t0, i32 2 120 br i1 %b, label %if, label %end 121 122 if: 123 %t2 = insertelement <8 x i16> zeroinitializer, i16 %t1, i32 3 124 %t3 = insertelement <8 x i16> %t2, i16 0, i32 6 125 %t4 = extractelement <4 x i16> %t0, i32 3 126 %t5 = insertelement <8 x i16> %t3, i16 %t4, i32 7 127 ret <8 x i16> %t5 128 129 end: 130 %a1 = add i16 %t1, 4 131 %t6 = insertelement <8 x i16> zeroinitializer, i16 %a1, i32 0 132 ret <8 x i16> %t6 133 } 134 135 ; The widening shuffle must be inserted at a valid point (after the PHIs). 136 137 define <4 x double> @pr25999_phis1(i1 %c, <2 x double> %a, <4 x double> %b) { 138 ; CHECK-LABEL: @pr25999_phis1( 139 ; CHECK-NEXT: bb1: 140 ; CHECK-NEXT: br i1 %c, label %bb2, label %bb3 141 ; CHECK: bb2: 142 ; CHECK-NEXT: [[R:%.*]] = call <2 x double> @dummy(<2 x double> %a) 143 ; CHECK-NEXT: br label %bb3 144 ; CHECK: bb3: 145 ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ %a, %bb1 ], [ [[R]], %bb2 ] 146 ; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ] 147 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 148 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 4, i32 3> 149 ; CHECK-NEXT: ret <4 x double> [[TMP4]] 150 ; 151 bb1: 152 br i1 %c, label %bb2, label %bb3 153 154 bb2: 155 %r = call <2 x double> @dummy(<2 x double> %a) 156 br label %bb3 157 158 bb3: 159 %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ] 160 %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ] 161 %tmp3 = extractelement <2 x double> %tmp1, i32 0 162 %tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2 163 ret <4 x double> %tmp4 164 } 165 166 declare <2 x double> @dummy(<2 x double>) 167 168 define <4 x double> @pr25999_phis2(i1 %c, <2 x double> %a, <4 x double> %b) { 169 ; CHECK-LABEL: @pr25999_phis2( 170 ; CHECK-NEXT: bb1: 171 ; CHECK-NEXT: br i1 %c, label %bb2, label %bb3 172 ; CHECK: bb2: 173 ; CHECK-NEXT: [[R:%.*]] = call <2 x double> @dummy(<2 x double> %a) 174 ; CHECK-NEXT: br label %bb3 175 ; CHECK: bb3: 176 ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ %a, %bb1 ], [ [[R]], %bb2 ] 177 ; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ] 178 ; CHECK-NEXT: [[D:%.*]] = fadd <2 x double> [[TMP1]], [[TMP1]] 179 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[D]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 180 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 4, i32 3> 181 ; CHECK-NEXT: ret <4 x double> [[TMP4]] 182 ; 183 bb1: 184 br i1 %c, label %bb2, label %bb3 185 186 bb2: 187 %r = call <2 x double> @dummy(<2 x double> %a) 188 br label %bb3 189 190 bb3: 191 %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ] 192 %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ] 193 %d = fadd <2 x double> %tmp1, %tmp1 194 %tmp3 = extractelement <2 x double> %d, i32 0 195 %tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2 196 ret <4 x double> %tmp4 197 } 198 199 ; PR26354: https://llvm.org/bugs/show_bug.cgi?id=26354 200 ; Don't create a shufflevector if we know that we're not going to replace the insertelement. 201 202 define double @pr26354(<2 x double>* %tmp, i1 %B) { 203 ; CHECK-LABEL: @pr26354( 204 ; CHECK-NEXT: entry: 205 ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* %tmp, align 16 206 ; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0 207 ; CHECK-NEXT: br i1 %B, label %if, label %end 208 ; CHECK: if: 209 ; CHECK-NEXT: [[E2:%.*]] = extractelement <2 x double> [[LD]], i32 1 210 ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, double [[E2]], i32 3 211 ; CHECK-NEXT: br label %end 212 ; CHECK: end: 213 ; CHECK-NEXT: [[PH:%.*]] = phi <4 x double> [ undef, %entry ], [ [[I1]], %if ] 214 ; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x double> [[PH]], i32 1 215 ; CHECK-NEXT: [[MU:%.*]] = fmul double [[E1]], [[E3]] 216 ; CHECK-NEXT: ret double [[MU]] 217 ; 218 219 entry: 220 %ld = load <2 x double>, <2 x double>* %tmp 221 %e1 = extractelement <2 x double> %ld, i32 0 222 %e2 = extractelement <2 x double> %ld, i32 1 223 br i1 %B, label %if, label %end 224 225 if: 226 %i1 = insertelement <4 x double> zeroinitializer, double %e2, i32 3 227 br label %end 228 229 end: 230 %ph = phi <4 x double> [ undef, %entry ], [ %i1, %if ] 231 %e3 = extractelement <4 x double> %ph, i32 1 232 %mu = fmul double %e1, %e3 233 ret double %mu 234 } 235 236 ; https://llvm.org/bugs/show_bug.cgi?id=30923 237 ; Delete the widening shuffle if we're not going to reduce the extract/insert to a shuffle. 238 239 define <4 x float> @PR30923(<2 x float> %x) { 240 ; CHECK-LABEL: @PR30923( 241 ; CHECK-NEXT: bb1: 242 ; CHECK-NEXT: [[EXT1:%.*]] = extractelement <2 x float> %x, i32 1 243 ; CHECK-NEXT: store float [[EXT1]], float* undef, align 4 244 ; CHECK-NEXT: br label %bb2 245 ; CHECK: bb2: 246 ; CHECK-NEXT: [[EXT2:%.*]] = extractelement <2 x float> %x, i32 0 247 ; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float undef, float undef>, float [[EXT2]], i32 2 248 ; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[EXT1]], i32 3 249 ; CHECK-NEXT: ret <4 x float> [[INS2]] 250 ; 251 bb1: 252 %ext1 = extractelement <2 x float> %x, i32 1 253 store float %ext1, float* undef, align 4 254 br label %bb2 255 256 bb2: 257 %widen = shufflevector <2 x float> %x, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 258 %ext2 = extractelement <4 x float> %widen, i32 0 259 %ins1 = insertelement <4 x float> <float 0.0, float 0.0, float undef, float undef>, float %ext2, i32 2 260 %ins2 = insertelement <4 x float> %ins1, float %ext1, i32 3 261 ret <4 x float> %ins2 262 } 263 264 ; Don't insert extractelements from the wider vector before the def of the index operand. 265 266 define <4 x i32> @extractelt_insertion(<2 x i32> %x, i32 %y) { 267 ; CHECK-LABEL: @extractelt_insertion( 268 ; CHECK-NEXT: entry: 269 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 270 ; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 2, i32 5> 271 ; CHECK-NEXT: [[C:%.*]] = add i32 [[Y:%.*]], 3 272 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 [[C]] 273 ; CHECK-NEXT: [[E:%.*]] = icmp eq i32 [[TMP1]], 0 274 ; CHECK-NEXT: [[RET:%.*]] = select i1 [[E]], <4 x i32> [[B]], <4 x i32> zeroinitializer 275 ; CHECK-NEXT: ret <4 x i32> [[RET]] 276 ; 277 entry: 278 %a = extractelement <2 x i32> %x, i32 1 279 %b = insertelement <4 x i32> zeroinitializer, i32 %a, i64 3 280 %c = add i32 %y, 3 281 %d = extractelement <2 x i32> %x, i32 %c 282 %e = icmp eq i32 %d, 0 283 %ret = select i1 %e, <4 x i32> %b, <4 x i32> zeroinitializer 284 ret <4 x i32> %ret 285 } 286