Home | History | Annotate | Download | only in InstCombine
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: opt -S -instcombine %s | FileCheck %s
      3 
      4 define <1 x i8> @test1(<8 x i8> %in) {
      5 ; CHECK-LABEL: @test1(
      6 ; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <8 x i8> %in, <8 x i8> undef, <1 x i32> <i32 5>
      7 ; CHECK-NEXT:    ret <1 x i8> [[VEC]]
      8 ;
      9   %val = extractelement <8 x i8> %in, i32 5
     10   %vec = insertelement <1 x i8> undef, i8 %val, i32 0
     11   ret <1 x i8> %vec
     12 }
     13 
     14 define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) {
     15 ; CHECK-LABEL: @test2(
     16 ; CHECK-NEXT:    [[VEC_3:%.*]] = shufflevector <8 x i16> %in2, <8 x i16> %in, <4 x i32> <i32 11, i32 9, i32 0, i32 10>
     17 ; CHECK-NEXT:    ret <4 x i16> [[VEC_3]]
     18 ;
     19   %elt0 = extractelement <8 x i16> %in, i32 3
     20   %elt1 = extractelement <8 x i16> %in, i32 1
     21   %elt2 = extractelement <8 x i16> %in2, i32 0
     22   %elt3 = extractelement <8 x i16> %in, i32 2
     23 
     24   %vec.0 = insertelement <4 x i16> undef, i16 %elt0, i32 0
     25   %vec.1 = insertelement <4 x i16> %vec.0, i16 %elt1, i32 1
     26   %vec.2 = insertelement <4 x i16> %vec.1, i16 %elt2, i32 2
     27   %vec.3 = insertelement <4 x i16> %vec.2, i16 %elt3, i32 3
     28 
     29   ret <4 x i16> %vec.3
     30 }
     31 
     32 define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) {
     33 ; CHECK-LABEL: @test_vcopyq_lane_p64(
     34 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x i64> %b, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
     35 ; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> %a, <2 x i64> [[TMP1]], <2 x i32> <i32 0, i32 2>
     36 ; CHECK-NEXT:    ret <2 x i64> [[RES]]
     37 ;
     38   %elt = extractelement <1 x i64> %b, i32 0
     39   %res = insertelement <2 x i64> %a, i64 %elt, i32 1
     40   ret <2 x i64> %res
     41 }
     42 
     43 ; PR2109: https://llvm.org/bugs/show_bug.cgi?id=2109
     44 
     45 define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) {
     46 ; CHECK-LABEL: @widen_extract2(
     47 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
     48 ; CHECK-NEXT:    [[I2:%.*]] = shufflevector <4 x float> %ins, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 4, i32 2, i32 5>
     49 ; CHECK-NEXT:    ret <4 x float> [[I2]]
     50 ;
     51   %e1 = extractelement <2 x float> %ext, i32 0
     52   %e2 = extractelement <2 x float> %ext, i32 1
     53   %i1 = insertelement <4 x float> %ins, float %e1, i32 1
     54   %i2 = insertelement <4 x float> %i1, float %e2, i32 3
     55   ret <4 x float> %i2
     56 }
     57 
     58 define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) {
     59 ; CHECK-LABEL: @widen_extract3(
     60 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x float> %ext, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
     61 ; CHECK-NEXT:    [[I3:%.*]] = shufflevector <4 x float> %ins, <4 x float> [[TMP1]], <4 x i32> <i32 6, i32 5, i32 4, i32 3>
     62 ; CHECK-NEXT:    ret <4 x float> [[I3]]
     63 ;
     64   %e1 = extractelement <3 x float> %ext, i32 0
     65   %e2 = extractelement <3 x float> %ext, i32 1
     66   %e3 = extractelement <3 x float> %ext, i32 2
     67   %i1 = insertelement <4 x float> %ins, float %e1, i32 2
     68   %i2 = insertelement <4 x float> %i1, float %e2, i32 1
     69   %i3 = insertelement <4 x float> %i2, float %e3, i32 0
     70   ret <4 x float> %i3
     71 }
     72 
     73 define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {
     74 ; CHECK-LABEL: @widen_extract4(
     75 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     76 ; CHECK-NEXT:    [[I1:%.*]] = shufflevector <8 x float> %ins, <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 7>
     77 ; CHECK-NEXT:    ret <8 x float> [[I1]]
     78 ;
     79   %e1 = extractelement <2 x float> %ext, i32 0
     80   %i1 = insertelement <8 x float> %ins, float %e1, i32 2
     81   ret <8 x float> %i1
     82 }
     83 
     84 ; PR26015: https://llvm.org/bugs/show_bug.cgi?id=26015
     85 ; The widening shuffle must be inserted before any uses.
     86 
     87 define <8 x i16> @pr26015(<4 x i16> %t0) {
     88 ; CHECK-LABEL: @pr26015(
     89 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
     90 ; CHECK-NEXT:    [[T5:%.*]] = shufflevector <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 10, i32 4, i32 5, i32 6, i32 11>
     91 ; CHECK-NEXT:    ret <8 x i16> [[T5]]
     92 ;
     93   %t1 = extractelement <4 x i16> %t0, i32 2
     94   %t2 = insertelement <8 x i16> zeroinitializer, i16 %t1, i32 3
     95   %t3 = insertelement <8 x i16> %t2, i16 0, i32 6
     96   %t4 = extractelement <4 x i16> %t0, i32 3
     97   %t5 = insertelement <8 x i16> %t3, i16 %t4, i32 7
     98   ret <8 x i16> %t5
     99 }
    100 
    101 ; PR25999: https://llvm.org/bugs/show_bug.cgi?id=25999
    102 ; TODO: The widening shuffle could be inserted at the start of the function to allow the first extract to use it.
    103 
    104 define <8 x i16> @pr25999(<4 x i16> %t0, i1 %b) {
    105 ; CHECK-LABEL: @pr25999(
    106 ; CHECK-NEXT:    [[T1:%.*]] = extractelement <4 x i16> %t0, i32 2
    107 ; CHECK-NEXT:    br i1 %b, label %if, label %end
    108 ; CHECK:       if:
    109 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    110 ; CHECK-NEXT:    [[T3:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, i16 [[T1]], i32 3
    111 ; CHECK-NEXT:    [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
    112 ; CHECK-NEXT:    ret <8 x i16> [[T5]]
    113 ; CHECK:       end:
    114 ; CHECK-NEXT:    [[A1:%.*]] = add i16 [[T1]], 4
    115 ; CHECK-NEXT:    [[T6:%.*]] = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 [[A1]], i32 0
    116 ; CHECK-NEXT:    ret <8 x i16> [[T6]]
    117 ;
    118 
    119   %t1 = extractelement <4 x i16> %t0, i32 2
    120   br i1 %b, label %if, label %end
    121 
    122 if:
    123   %t2 = insertelement <8 x i16> zeroinitializer, i16 %t1, i32 3
    124   %t3 = insertelement <8 x i16> %t2, i16 0, i32 6
    125   %t4 = extractelement <4 x i16> %t0, i32 3
    126   %t5 = insertelement <8 x i16> %t3, i16 %t4, i32 7
    127   ret <8 x i16> %t5
    128 
    129 end:
    130   %a1 = add i16 %t1, 4
    131   %t6 = insertelement <8 x i16> zeroinitializer, i16 %a1, i32 0
    132   ret <8 x i16> %t6
    133 }
    134 
    135 ; The widening shuffle must be inserted at a valid point (after the PHIs).
    136 
    137 define <4 x double> @pr25999_phis1(i1 %c, <2 x double> %a, <4 x double> %b) {
    138 ; CHECK-LABEL: @pr25999_phis1(
    139 ; CHECK-NEXT:  bb1:
    140 ; CHECK-NEXT:    br i1 %c, label %bb2, label %bb3
    141 ; CHECK:       bb2:
    142 ; CHECK-NEXT:    [[R:%.*]] = call <2 x double> @dummy(<2 x double> %a)
    143 ; CHECK-NEXT:    br label %bb3
    144 ; CHECK:       bb3:
    145 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x double> [ %a, %bb1 ], [ [[R]], %bb2 ]
    146 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
    147 ; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
    148 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
    149 ; CHECK-NEXT:    ret <4 x double> [[TMP4]]
    150 ;
    151 bb1:
    152   br i1 %c, label %bb2, label %bb3
    153 
    154 bb2:
    155   %r = call <2 x double> @dummy(<2 x double> %a)
    156   br label %bb3
    157 
    158 bb3:
    159   %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
    160   %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
    161   %tmp3 = extractelement <2 x double> %tmp1, i32 0
    162   %tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2
    163   ret <4 x double> %tmp4
    164 }
    165 
    166 declare <2 x double> @dummy(<2 x double>)
    167 
    168 define <4 x double> @pr25999_phis2(i1 %c, <2 x double> %a, <4 x double> %b) {
    169 ; CHECK-LABEL: @pr25999_phis2(
    170 ; CHECK-NEXT:  bb1:
    171 ; CHECK-NEXT:    br i1 %c, label %bb2, label %bb3
    172 ; CHECK:       bb2:
    173 ; CHECK-NEXT:    [[R:%.*]] = call <2 x double> @dummy(<2 x double> %a)
    174 ; CHECK-NEXT:    br label %bb3
    175 ; CHECK:       bb3:
    176 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x double> [ %a, %bb1 ], [ [[R]], %bb2 ]
    177 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
    178 ; CHECK-NEXT:    [[D:%.*]] = fadd <2 x double> [[TMP1]], [[TMP1]]
    179 ; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x double> [[D]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
    180 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
    181 ; CHECK-NEXT:    ret <4 x double> [[TMP4]]
    182 ;
    183 bb1:
    184   br i1 %c, label %bb2, label %bb3
    185 
    186 bb2:
    187   %r = call <2 x double> @dummy(<2 x double> %a)
    188   br label %bb3
    189 
    190 bb3:
    191   %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
    192   %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
    193   %d = fadd <2 x double> %tmp1, %tmp1
    194   %tmp3 = extractelement <2 x double> %d, i32 0
    195   %tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2
    196   ret <4 x double> %tmp4
    197 }
    198 
    199 ; PR26354: https://llvm.org/bugs/show_bug.cgi?id=26354
    200 ; Don't create a shufflevector if we know that we're not going to replace the insertelement.
    201 
    202 define double @pr26354(<2 x double>* %tmp, i1 %B) {
    203 ; CHECK-LABEL: @pr26354(
    204 ; CHECK-NEXT:  entry:
    205 ; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, <2 x double>* %tmp, align 16
    206 ; CHECK-NEXT:    [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0
    207 ; CHECK-NEXT:    br i1 %B, label %if, label %end
    208 ; CHECK:       if:
    209 ; CHECK-NEXT:    [[E2:%.*]] = extractelement <2 x double> [[LD]], i32 1
    210 ; CHECK-NEXT:    [[I1:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, double [[E2]], i32 3
    211 ; CHECK-NEXT:    br label %end
    212 ; CHECK:       end:
    213 ; CHECK-NEXT:    [[PH:%.*]] = phi <4 x double> [ undef, %entry ], [ [[I1]], %if ]
    214 ; CHECK-NEXT:    [[E3:%.*]] = extractelement <4 x double> [[PH]], i32 1
    215 ; CHECK-NEXT:    [[MU:%.*]] = fmul double [[E1]], [[E3]]
    216 ; CHECK-NEXT:    ret double [[MU]]
    217 ;
    218 
    219 entry:
    220   %ld = load <2 x double>, <2 x double>* %tmp
    221   %e1 = extractelement <2 x double> %ld, i32 0
    222   %e2 = extractelement <2 x double> %ld, i32 1
    223   br i1 %B, label %if, label %end
    224 
    225 if:
    226   %i1 = insertelement <4 x double> zeroinitializer, double %e2, i32 3
    227   br label %end
    228 
    229 end:
    230   %ph = phi <4 x double> [ undef, %entry ], [ %i1, %if ]
    231   %e3 = extractelement <4 x double> %ph, i32 1
    232   %mu = fmul double %e1, %e3
    233   ret double %mu
    234 }
    235 
    236 ; https://llvm.org/bugs/show_bug.cgi?id=30923
    237 ; Delete the widening shuffle if we're not going to reduce the extract/insert to a shuffle.
    238 
    239 define <4 x float> @PR30923(<2 x float> %x) {
    240 ; CHECK-LABEL: @PR30923(
    241 ; CHECK-NEXT:  bb1:
    242 ; CHECK-NEXT:    [[EXT1:%.*]] = extractelement <2 x float> %x, i32 1
    243 ; CHECK-NEXT:    store float [[EXT1]], float* undef, align 4
    244 ; CHECK-NEXT:    br label %bb2
    245 ; CHECK:       bb2:
    246 ; CHECK-NEXT:    [[EXT2:%.*]] = extractelement <2 x float> %x, i32 0
    247 ; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float undef, float undef>, float [[EXT2]], i32 2
    248 ; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[EXT1]], i32 3
    249 ; CHECK-NEXT:    ret <4 x float> [[INS2]]
    250 ;
    251 bb1:
    252   %ext1 = extractelement <2 x float> %x, i32 1
    253   store float %ext1, float* undef, align 4
    254   br label %bb2
    255 
    256 bb2:
    257   %widen = shufflevector <2 x float> %x, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    258   %ext2 = extractelement <4 x float> %widen, i32 0
    259   %ins1 = insertelement <4 x float> <float 0.0, float 0.0, float undef, float undef>, float %ext2, i32 2
    260   %ins2 = insertelement <4 x float> %ins1, float %ext1, i32 3
    261   ret <4 x float> %ins2
    262 }
    263 
    264 ; Don't insert extractelements from the wider vector before the def of the index operand.
    265 
    266 define <4 x i32> @extractelt_insertion(<2 x i32> %x, i32 %y) {
    267 ; CHECK-LABEL: @extractelt_insertion(
    268 ; CHECK-NEXT:  entry:
    269 ; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    270 ; CHECK-NEXT:    [[B:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
    271 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[Y:%.*]], 3
    272 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 [[C]]
    273 ; CHECK-NEXT:    [[E:%.*]] = icmp eq i32 [[TMP1]], 0
    274 ; CHECK-NEXT:    [[RET:%.*]] = select i1 [[E]], <4 x i32> [[B]], <4 x i32> zeroinitializer
    275 ; CHECK-NEXT:    ret <4 x i32> [[RET]]
    276 ;
    277 entry:
    278   %a = extractelement <2 x i32> %x, i32 1
    279   %b = insertelement <4 x i32> zeroinitializer, i32 %a, i64 3
    280   %c = add i32 %y, 3
    281   %d = extractelement <2 x i32> %x, i32 %c
    282   %e = icmp eq i32 %d, 0
    283   %ret = select i1 %e, <4 x i32> %b, <4 x i32> zeroinitializer
    284   ret <4 x i32> %ret
    285 }
    286