Home | History | Annotate | Download | only in X86
      1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
      2 
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=corei7-avx                             -o /dev/null
      4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2                 -o /dev/null
      5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl                                    -o /dev/null
      6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl                  -o /dev/null
      7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512bw                  -o /dev/null
      8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
      9 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx                                    -o /dev/null
     10 
     11 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
     12   ; Force the execution domain with an add.
     13   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
     14   %x = and <4 x i64> %a2, %b
     15   ret <4 x i64> %x
     16 }
     17 
     18 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
     19   ; Force the execution domain with an add.
     20   %a2 = add <2 x i64> %a, <i64 1, i64 1>
     21   %x = and <2 x i64> %a2, %b
     22   ret <2 x i64> %x
     23 }
     24 
     25 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
     26   ; Force the execution domain with an add.
     27   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
     28   %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
     29   %x = and <4 x i64> %a, %y
     30   ret <4 x i64> %x
     31 }
     32 
     33 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
     34   ; Force the execution domain with an add.
     35   %a2 = add <2 x i64> %a, <i64 1, i64 1>
     36   %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
     37   %x = and <2 x i64> %a, %y
     38   ret <2 x i64> %x
     39 }
     40 
     41 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
     42   ; Force the execution domain with an add.
     43   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
     44   %x = or <4 x i64> %a2, %b
     45   ret <4 x i64> %x
     46 }
     47 
     48 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
     49   ; Force the execution domain with an add.
     50   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
     51   %x = xor <4 x i64> %a2, %b
     52   ret <4 x i64> %x
     53 }
     54 
     55 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
     56   ; Force the execution domain with an add.
     57   %a2 = add <2 x i64> %a, <i64 1, i64 1>
     58   %x = or <2 x i64> %a2, %b
     59   ret <2 x i64> %x
     60 }
     61 
     62 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
     63   ; Force the execution domain with an add.
     64   %a2 = add <2 x i64> %a, <i64 1, i64 1>
     65   %x = xor <2 x i64> %a2, %b
     66   ret <2 x i64> %x
     67 }
     68 
     69 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
     70   %x = add <4 x i64> %i, %j
     71   ret <4 x i64> %x
     72 }
     73 
     74 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
     75   %x = add <8 x i32> %i, %j
     76   ret <8 x i32> %x
     77 }
     78 
     79 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
     80   %x = add <16 x i16> %i, %j
     81   ret <16 x i16> %x
     82 }
     83 
     84 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
     85   %x = add <32 x i8> %i, %j
     86   ret <32 x i8> %x
     87 }
     88 
     89 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
     90   %x = sub <4 x i64> %i, %j
     91   ret <4 x i64> %x
     92 }
     93 
     94 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
     95   %x = sub <8 x i32> %i, %j
     96   ret <8 x i32> %x
     97 }
     98 
     99 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    100   %x = sub <16 x i16> %i, %j
    101   ret <16 x i16> %x
    102 }
    103 
    104 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
    105   %x = sub <32 x i8> %i, %j
    106   ret <32 x i8> %x
    107 }
    108 
    109 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    110   %x = mul <16 x i16> %i, %j
    111   ret <16 x i16> %x
    112 }
    113 
    114 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
    115   %bincmp = icmp slt <8 x i32> %i, %j
    116   %x = sext <8 x i1> %bincmp to <8 x i32>
    117   ret <8 x i32> %x
    118 }
    119 
    120 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
    121   %bincmp = icmp eq <32 x i8> %i, %j
    122   %x = sext <32 x i1> %bincmp to <32 x i8>
    123   ret <32 x i8> %x
    124 }
    125 
    126 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    127   %bincmp = icmp eq <16 x i16> %i, %j
    128   %x = sext <16 x i1> %bincmp to <16 x i16>
    129   ret <16 x i16> %x
    130 }
    131 
    132 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
    133   %bincmp = icmp slt <32 x i8> %i, %j
    134   %x = sext <32 x i1> %bincmp to <32 x i8>
    135   ret <32 x i8> %x
    136 }
    137 
    138 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    139   %bincmp = icmp slt <16 x i16> %i, %j
    140   %x = sext <16 x i1> %bincmp to <16 x i16>
    141   ret <16 x i16> %x
    142 }
    143 
    144 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
    145   %bincmp = icmp eq <8 x i32> %i, %j
    146   %x = sext <8 x i1> %bincmp to <8 x i32>
    147   ret <8 x i32> %x
    148 }
    149 
    150 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
    151   %x = add <2 x i64> %i, %j
    152   ret <2 x i64> %x
    153 }
    154 
    155 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
    156   %x = add <4 x i32> %i, %j
    157   ret <4 x i32> %x
    158 }
    159 
    160 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
    161   %x = add <8 x i16> %i, %j
    162   ret <8 x i16> %x
    163 }
    164 
    165 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
    166   %x = add <16 x i8> %i, %j
    167   ret <16 x i8> %x
    168 }
    169 
    170 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
    171   %x = sub <2 x i64> %i, %j
    172   ret <2 x i64> %x
    173 }
    174 
    175 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
    176   %x = sub <4 x i32> %i, %j
    177   ret <4 x i32> %x
    178 }
    179 
    180 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
    181   %x = sub <8 x i16> %i, %j
    182   ret <8 x i16> %x
    183 }
    184 
    185 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
    186   %x = sub <16 x i8> %i, %j
    187   ret <16 x i8> %x
    188 }
    189 
    190 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
    191   %x = mul <8 x i16> %i, %j
    192   ret <8 x i16> %x
    193 }
    194 
    195 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
    196   %bincmp = icmp slt <8 x i16> %i, %j
    197   %x = sext <8 x i1> %bincmp to <8 x i16>
    198   ret <8 x i16> %x
    199 }
    200 
    201 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
    202   %bincmp = icmp slt <16 x i8> %i, %j
    203   %x = sext <16 x i1> %bincmp to <16 x i8>
    204   ret <16 x i8> %x
    205 }
    206 
    207 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
    208   %bincmp = icmp eq <8 x i16> %i, %j
    209   %x = sext <8 x i1> %bincmp to <8 x i16>
    210   ret <8 x i16> %x
    211 }
    212 
    213 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
    214   %bincmp = icmp eq <16 x i8> %i, %j
    215   %x = sext <16 x i1> %bincmp to <16 x i8>
    216   ret <16 x i8> %x
    217 }
    218 
    219 define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
    220   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
    221   ret <8 x i16> %shuffle
    222 }
    223 
    224 define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
    225   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
    226   ret <16 x i16> %shuffle
    227 }
    228 
    229 define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
    230   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
    231   ret <16 x i8> %shuffle
    232 }
    233 
    234 define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
    235   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    236   ret <32 x i8> %shuffle
    237 }
    238 
    239 define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
    240   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
    241   ret <2 x i64> %shuffle
    242 }
    243 
    244 define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
    245   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
    246   ret <4 x i32> %shuffle
    247 }
    248 
    249 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
    250   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
    251   ret <8 x i32> %shuffle
    252 }
    253 
    254 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
    255   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
    256   ret <4 x double> %shuffle
    257 }
    258 
    259 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
    260   %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
    261   %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
    262   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
    263   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
    264   ret <2 x double> %bitcast64
    265 }
    266 
    267 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
    268   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
    269   ret <16 x i16> %shuffle
    270 }
    271 
    272 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
    273   %r1 = extractelement <2 x i64> %x, i32 0
    274   %r2 = extractelement <2 x i64> %x, i32 1
    275   store i64 %r2, i64* %dst, align 1
    276   ret i64 %r1
    277 }
    278 
    279 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
    280   %r1 = extractelement <4 x i32> %x, i32 1
    281   %r2 = extractelement <4 x i32> %x, i32 3
    282   store i32 %r2, i32* %dst, align 1
    283   ret i32 %r1
    284 }
    285 
    286 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
    287   %r1 = extractelement <8 x i16> %x, i32 1
    288   %r2 = extractelement <8 x i16> %x, i32 3
    289   store i16 %r2, i16* %dst, align 1
    290   ret i16 %r1
    291 }
    292 
    293 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
    294   %r1 = extractelement <16 x i8> %x, i32 1
    295   %r2 = extractelement <16 x i8> %x, i32 3
    296   store i8 %r2, i8* %dst, align 1
    297   ret i8 %r1
    298 }
    299 
    300 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
    301   %val = load i64, i64* %ptr
    302   %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
    303   %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
    304   ret <2 x i64> %r2
    305 }
    306 
    307 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
    308   %val = load i32, i32* %ptr
    309   %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
    310   %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
    311   ret <4 x i32> %r2
    312 }
    313 
    314 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
    315   %val = load i16, i16* %ptr
    316   %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
    317   %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
    318   ret <8 x i16> %r2
    319 }
    320 
    321 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
    322   %val = load i8, i8* %ptr
    323   %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
    324   %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
    325   ret <16 x i8> %r2
    326 }
    327 
    328 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
    329   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
    330   ret <4 x i32> %shuffle
    331 }
    332 
    333 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
    334  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
    335   ret <4 x i32> %shuffle
    336 }
    337 
    338 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
    339   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
    340   ret <16 x i8> %shuffle
    341 }
    342 
    343 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    344   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    345   ret <16 x i16> %shuffle
    346 }
    347 
    348 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
    349 ; vmovshdup 256 test
    350   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
    351   ret <8 x float> %shuffle
    352 }
    353 
    354 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
    355 ; vmovshdup 128 test 
    356   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
    357   ret <4 x float> %shuffle
    358 }
    359 
    360 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
    361 ; vmovsldup 256 test
    362   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
    363   ret <8 x float> %shuffle
    364 }
    365 
    366 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
    367 ; vmovsldup 128 test
    368   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    369   ret <4 x float> %shuffle
    370 }
    371 
    372 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
    373   %a = load double, double* %ptr
    374   %v = insertelement <2 x double> undef, double %a, i32 0
    375   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
    376   ret <2 x double> %shuffle
    377 }
    378 
    379 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
    380   %a = load double, double* %ptr
    381   %v = insertelement <2 x double> undef, double %a, i32 0
    382   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
    383   ret <2 x double> %shuffle
    384 }
    385 
    386 define void @store_floats(<4 x float> %x, i64* %p) {
    387   %a = fadd <4 x float> %x, %x
    388   %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    389   %c = bitcast <2 x float> %b to i64
    390   store i64 %c, i64* %p
    391   ret void
    392 }
    393 
    394 define void @store_double(<2 x double> %x, i64* %p) {
    395   %a = fadd <2 x double> %x, %x
    396   %b = extractelement <2 x double> %a, i32 0
    397   %c = bitcast double %b to i64
    398   store i64 %c, i64* %p
    399   ret void
    400 }
    401 
    402 define void @store_h_double(<2 x double> %x, i64* %p) {
    403   %a = fadd <2 x double> %x, %x
    404   %b = extractelement <2 x double> %a, i32 1
    405   %c = bitcast double %b to i64
    406   store i64 %c, i64* %p
    407   ret void
    408 }
    409 
    410 define <2 x double> @test39(double* %ptr) nounwind {
    411   %a = load double, double* %ptr
    412   %v = insertelement <2 x double> undef, double %a, i32 0
    413   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
    414   ret <2 x double> %shuffle
    415   }
    416 
    417 define <2 x double> @test40(<2 x double>* %ptr) nounwind {
    418   %v = load  <2 x double>,  <2 x double>* %ptr
    419   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
    420   ret <2 x double> %shuffle
    421   }
    422 
    423 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
    424   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
    425   ret <2 x double> %shuffle
    426 }
    427 
    428 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
    429   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    430   ret <4 x double> %shuffle
    431 }
    432 
    433 define <8 x i32> @ashr_v8i32(<8 x i32> %a, <8 x i32> %b) {
    434   %shift = ashr <8 x i32> %a, %b
    435   ret <8 x i32> %shift
    436 }
    437 
    438 define <8 x i32> @lshr_v8i32(<8 x i32> %a, <8 x i32> %b) {
    439   %shift = lshr <8 x i32> %a, %b
    440   ret <8 x i32> %shift
    441 }
    442 
    443 define <8 x i32> @shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
    444   %shift = shl <8 x i32> %a, %b
    445   ret <8 x i32> %shift
    446 }
    447 
    448 define <8 x i32> @ashr_const_v8i32(<8 x i32> %a) {
    449   %shift = ashr <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    450   ret <8 x i32> %shift
    451 }
    452 
    453 define <8 x i32> @lshr_const_v8i32(<8 x i32> %a) {
    454   %shift = lshr <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    455   ret <8 x i32> %shift
    456 }
    457 
    458 define <8 x i32> @shl_const_v8i32(<8 x i32> %a) {
    459   %shift = shl <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    460   ret <8 x i32> %shift
    461 }
    462 
    463 define <4 x i64> @ashr_v4i64(<4 x i64> %a, <4 x i64> %b) {
    464   %shift = ashr <4 x i64> %a, %b
    465   ret <4 x i64> %shift
    466 }
    467 
    468 define <4 x i64> @lshr_v4i64(<4 x i64> %a, <4 x i64> %b) {
    469   %shift = lshr <4 x i64> %a, %b
    470   ret <4 x i64> %shift
    471 }
    472 
    473 define <4 x i64> @shl_v4i64(<4 x i64> %a, <4 x i64> %b) {
    474   %shift = shl <4 x i64> %a, %b
    475   ret <4 x i64> %shift
    476 }
    477 
    478 define <4 x i64> @ashr_const_v4i64(<4 x i64> %a) {
    479   %shift = ashr <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
    480   ret <4 x i64> %shift
    481 }
    482 
    483 define <4 x i64> @lshr_const_v4i64(<4 x i64> %a) {
    484   %shift = lshr <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
    485   ret <4 x i64> %shift
    486 }
    487 
    488 define <4 x i64> @shl_const_v4i64(<4 x i64> %a) {
    489   %shift = shl <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
    490   ret <4 x i64> %shift
    491 }
    492 
    493 define <16 x i16> @ashr_v16i16(<16 x i16> %a, <16 x i16> %b) {
    494   %shift = ashr <16 x i16> %a, %b
    495   ret <16 x i16> %shift
    496 }
    497 
    498 define <16 x i16> @lshr_v16i16(<16 x i16> %a, <16 x i16> %b) {
    499   %shift = lshr <16 x i16> %a, %b
    500   ret <16 x i16> %shift
    501 }
    502 
    503 define <16 x i16> @shl_v16i16(<16 x i16> %a, <16 x i16> %b) {
    504   %shift = shl <16 x i16> %a, %b
    505   ret <16 x i16> %shift
    506 }
    507 
    508 define <16 x i16> @ashr_const_v16i16(<16 x i16> %a) {
    509   %shift = ashr <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    510   ret <16 x i16> %shift
    511 }
    512 
    513 define <16 x i16> @lshr_const_v16i16(<16 x i16> %a) {
    514   %shift = lshr <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    515   ret <16 x i16> %shift
    516 }
    517 
    518 define <16 x i16> @shl_const_v16i16(<16 x i16> %a) {
    519   %shift = shl <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    520   ret <16 x i16> %shift
    521 }
    522 
    523 define <4 x i32> @ashr_v4i32(<4 x i32> %a, <4 x i32> %b) {
    524   %shift = ashr <4 x i32> %a, %b
    525   ret <4 x i32> %shift
    526 }
    527 
    528 define <4 x i32> @shl_const_v4i32(<4 x i32> %a) {
    529   %shift = shl <4 x i32> %a,  <i32 3, i32 3, i32 3, i32 3>
    530   ret <4 x i32> %shift
    531 }
    532 
    533 define <2 x i64> @ashr_v2i64(<2 x i64> %a, <2 x i64> %b) {
    534   %shift = ashr <2 x i64> %a, %b
    535   ret <2 x i64> %shift
    536 }
    537 
    538 define <2 x i64> @shl_const_v2i64(<2 x i64> %a) {
    539   %shift = shl <2 x i64> %a,  <i64 3, i64 3>
    540   ret <2 x i64> %shift
    541 }
    542 
    543 define <8 x i16> @ashr_v8i16(<8 x i16> %a, <8 x i16> %b) {
    544   %shift = ashr <8 x i16> %a, %b
    545   ret <8 x i16> %shift
    546 }
    547 
    548 define <8 x i16> @lshr_v8i16(<8 x i16> %a, <8 x i16> %b) {
    549   %shift = lshr <8 x i16> %a, %b
    550   ret <8 x i16> %shift
    551 }
    552 
    553 define <8 x i16> @shl_v8i16(<8 x i16> %a, <8 x i16> %b) {
    554   %shift = shl <8 x i16> %a, %b
    555   ret <8 x i16> %shift
    556 }
    557 
    558 define <8 x i16> @ashr_const_v8i16(<8 x i16> %a) {
    559   %shift = ashr <8 x i16> %a,<i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    560   ret <8 x i16> %shift
    561 }
    562 
    563 define <8 x i16> @lshr_const_v8i16(<8 x i16> %a) {
    564   %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    565   ret <8 x i16> %shift
    566 }
    567 
    568 define <8 x i16> @shl_const_v8i16(<8 x i16> %a) {
    569   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    570   ret <8 x i16> %shift
    571 }
    572 
    573 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
    574 entry:
    575   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    576   %C = zext <8 x i8> %B to <8 x i16>
    577   ret <8 x i16> %C
    578 }
    579 
    580 define   <32 x i8> @_broadcast32xi8(i8 %a) {
    581   %b = insertelement <32 x i8> undef, i8 %a, i32 0
    582   %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
    583   ret <32 x i8> %c
    584 }
    585 
    586 define   <16 x i8> @_broadcast16xi8(i8 %a) {
    587   %b = insertelement <16 x i8> undef, i8 %a, i32 0
    588   %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
    589   ret <16 x i8> %c
    590 }
    591 
    592 define   <16 x i16> @_broadcast16xi16(i16 %a) {
    593   %b = insertelement <16 x i16> undef, i16 %a, i32 0
    594   %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
    595   ret <16 x i16> %c
    596 }
    597 
    598 define   <8 x i16> @_broadcast8xi16(i16 %a) {
    599   %b = insertelement <8 x i16> undef, i16 %a, i32 0
    600   %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
    601   ret <8 x i16> %c
    602 }
    603 
    604 define <8 x i32> @_broadcast8xi32(i32 %a) {
    605   %b = insertelement <8 x i32> undef, i32 %a, i32 0
    606   %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
    607   ret <8 x i32> %c
    608 }
    609 
    610 define <4 x i32> @_broadcast4xi32(i32 %a) {
    611   %b = insertelement <4 x i32> undef, i32 %a, i32 0
    612   %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
    613   ret <4 x i32> %c
    614 }
    615 
    616 define <4 x i64> @_broadcast4xi64(i64 %a) {
    617   %b = insertelement <4 x i64> undef, i64 %a, i64 0
    618   %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
    619   ret <4 x i64> %c
    620 }
    621 
    622 define <2 x i64> @_broadcast2xi64(i64 %a) {
    623   %b = insertelement <2 x i64> undef, i64 %a, i64 0
    624   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
    625   ret <2 x i64> %c
    626 }
    627 
    628 define   <8 x float> @_broadcast8xfloat(float %a) {
    629   %b = insertelement <8 x float> undef, float %a, i32 0
    630   %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
    631   ret <8 x float> %c
    632 }
    633 
    634 define   <4 x float> @_broadcast4xfloat(float %a) {
    635   %b = insertelement <4 x float> undef, float %a, i32 0
    636   %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
    637   ret <4 x float> %c
    638 }
    639 
    640 define   <4 x double> @_broadcast4xdouble(double %a) {
    641   %b = insertelement <4 x double> undef, double %a, i32 0
    642   %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
    643   ret <4 x double> %c
    644 }
    645 
    646 define   <2 x double> @_broadcast2xdouble(double %a) {
    647   %b = insertelement <2 x double> undef, double %a, i32 0
    648   %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
    649   ret <2 x double> %c
    650 }
    651 
    652 define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    653   %x = fmul <4 x float> %a0, %a1
    654   %res = fsub <4 x float> %x, %a2
    655   ret <4 x float> %res
    656 }
    657 
    658 define <32 x i8> @test_cmpgtb(<32 x i8> %A) {
    659 ; generate the follow code
    660 ;	 vpxor	 %ymm1, %ymm1, %ymm1
    661 ;  vpcmpgtb %ymm0, %ymm1, %ymm0
    662   %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
    663   ret <32 x i8> %B
    664 }
    665 
    666 define   <4 x float> @_inreg4xfloat(float %a) {
    667   %b = insertelement <4 x float> undef, float %a, i32 0
    668   %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
    669   ret <4 x float> %c
    670 }
    671 
    672 define   <8 x float> @_inreg8xfloat(float %a) {
    673   %b = insertelement <8 x float> undef, float %a, i32 0
    674   %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
    675   ret <8 x float> %c
    676 }
    677 
    678 define   <4 x double> @_inreg4xdouble(double %a) {
    679   %b = insertelement <4 x double> undef, double %a, i32 0
    680   %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
    681   ret <4 x double> %c
    682 }
    683