Home | History | Annotate | Download | only in InstCombine
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: opt < %s -instcombine -S | FileCheck %s
      3 
      4 ;
      5 ; EXTRQ
      6 ;
      7 
      8 define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
      9 ; CHECK-LABEL: @test_extrq_call(
     10 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
     11 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
     12 ;
     13   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
     14   ret <2 x i64> %1
     15 }
     16 
     17 define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
     18 ; CHECK-LABEL: @test_extrq_zero_arg0(
     19 ; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
     20 ;
     21   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
     22   ret <2 x i64> %1
     23 }
     24 
     25 define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
     26 ; CHECK-LABEL: @test_extrq_zero_arg1(
     27 ; CHECK-NEXT:    ret <2 x i64> %x
     28 ;
     29   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
     30   ret <2 x i64> %1
     31 }
     32 
     33 define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
     34 ; CHECK-LABEL: @test_extrq_to_extqi(
     35 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
     36 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
     37 ;
     38   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
     39   ret <2 x i64> %1
     40 }
     41 
     42 define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
     43 ; CHECK-LABEL: @test_extrq_constant(
     44 ; CHECK-NEXT:    ret <2 x i64> <i64 255, i64 undef>
     45 ;
     46   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
     47   ret <2 x i64> %1
     48 }
     49 
     50 define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
     51 ; CHECK-LABEL: @test_extrq_constant_undef(
     52 ; CHECK-NEXT:    ret <2 x i64> <i64 65535, i64 undef>
     53 ;
     54   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
     55   ret <2 x i64> %1
     56 }
     57 
     58 ;
     59 ; EXTRQI
     60 ;
     61 
     62 define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
     63 ; CHECK-LABEL: @test_extrqi_call(
     64 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
     65 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
     66 ;
     67   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
     68   ret <2 x i64> %1
     69 }
     70 
     71 define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
     72 ; CHECK-LABEL: @test_extrqi_shuffle_1zuu(
     73 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
     74 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     75 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
     76 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
     77 ;
     78   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
     79   ret <2 x i64> %1
     80 }
     81 
     82 define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
     83 ; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(
     84 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
     85 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     86 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
     87 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
     88 ;
     89   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
     90   ret <2 x i64> %1
     91 }
     92 
     93 define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
     94 ; CHECK-LABEL: @test_extrqi_undef(
     95 ; CHECK-NEXT:    ret <2 x i64> undef
     96 ;
     97   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
     98   ret <2 x i64> %1
     99 }
    100 
    101 define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
    102 ; CHECK-LABEL: @test_extrqi_zero(
    103 ; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
    104 ;
    105   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
    106   ret <2 x i64> %1
    107 }
    108 
    109 define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
    110 ; CHECK-LABEL: @test_extrqi_constant(
    111 ; CHECK-NEXT:    ret <2 x i64> <i64 7, i64 undef>
    112 ;
    113   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
    114   ret <2 x i64> %1
    115 }
    116 
    117 define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
    118 ; CHECK-LABEL: @test_extrqi_constant_undef(
    119 ; CHECK-NEXT:    ret <2 x i64> <i64 15, i64 undef>
    120 ;
    121   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
    122   ret <2 x i64> %1
    123 }
    124 
    125 ;
    126 ; INSERTQ
    127 ;
    128 
    129 define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
    130 ; CHECK-LABEL: @test_insertq_call(
    131 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
    132 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    133 ;
    134   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
    135   ret <2 x i64> %1
    136 }
    137 
    138 define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
    139 ; CHECK-LABEL: @test_insertq_to_insertqi(
    140 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
    141 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    142 ;
    143   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
    144   ret <2 x i64> %1
    145 }
    146 
    147 define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
    148 ; CHECK-LABEL: @test_insertq_constant(
    149 ; CHECK-NEXT:    ret <2 x i64> <i64 32, i64 undef>
    150 ;
    151   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
    152   ret <2 x i64> %1
    153 }
    154 
    155 define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
    156 ; CHECK-LABEL: @test_insertq_constant_undef(
    157 ; CHECK-NEXT:    ret <2 x i64> <i64 33, i64 undef>
    158 ;
    159   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
    160   ret <2 x i64> %1
    161 }
    162 
    163 ;
    164 ; INSERTQI
    165 ;
    166 
    167 define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
    168 ; CHECK-LABEL: @test_insertqi_shuffle_04uu(
    169 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    170 ; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
    171 ;
    172   %1 = bitcast <16 x i8> %v to <2 x i64>
    173   %2 = bitcast <16 x i8> %i to <2 x i64>
    174   %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
    175   %4 = bitcast <2 x i64> %3 to <16 x i8>
    176   ret <16 x i8> %4
    177 }
    178 
    179 define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
    180 ; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu(
    181 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    182 ; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
    183 ;
    184   %1 = bitcast <16 x i8> %v to <2 x i64>
    185   %2 = bitcast <16 x i8> %i to <2 x i64>
    186   %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
    187   %4 = bitcast <2 x i64> %3 to <16 x i8>
    188   ret <16 x i8> %4
    189 }
    190 
    191 define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
    192 ; CHECK-LABEL: @test_insertqi_constant(
    193 ; CHECK-NEXT:    ret <2 x i64> <i64 -131055, i64 undef>
    194 ;
    195   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
    196   ret <2 x i64> %1
    197 }
    198 
    199 ; The result of this insert is the second arg, since the top 64 bits of
    200 ; the result are undefined, and we copy the bottom 64 bits from the
    201 ; second arg
    202 define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
    203 ; CHECK-LABEL: @testInsert64Bits(
    204 ; CHECK-NEXT:    ret <2 x i64> %i
    205 ;
    206   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
    207   ret <2 x i64> %1
    208 }
    209 
    210 define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
    211 ; CHECK-LABEL: @testZeroLength(
    212 ; CHECK-NEXT:    ret <2 x i64> %i
    213 ;
    214   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
    215   ret <2 x i64> %1
    216 }
    217 
    218 define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
    219 ; CHECK-LABEL: @testUndefinedInsertq_1(
    220 ; CHECK-NEXT:    ret <2 x i64> undef
    221 ;
    222   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
    223   ret <2 x i64> %1
    224 }
    225 
    226 define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
    227 ; CHECK-LABEL: @testUndefinedInsertq_2(
    228 ; CHECK-NEXT:    ret <2 x i64> undef
    229 ;
    230   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
    231   ret <2 x i64> %1
    232 }
    233 
    234 define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
    235 ; CHECK-LABEL: @testUndefinedInsertq_3(
    236 ; CHECK-NEXT:    ret <2 x i64> undef
    237 ;
    238   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
    239   ret <2 x i64> %1
    240 }
    241 
    242 ;
    243 ; Vector Demanded Bits
    244 ;
    245 
    246 define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
    247 ; CHECK-LABEL: @test_extrq_arg0(
    248 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
    249 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    250 ;
    251   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    252   %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
    253   ret <2 x i64> %2
    254 }
    255 
    256 define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
    257 ; CHECK-LABEL: @test_extrq_arg1(
    258 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
    259 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    260 ;
    261   %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    262   %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
    263   ret <2 x i64> %2
    264 }
    265 
    266 define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
    267 ; CHECK-LABEL: @test_extrq_args01(
    268 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
    269 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    270 ;
    271   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    272   %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    273   %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
    274   ret <2 x i64> %3
    275 }
    276 
    277 define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
    278 ; CHECK-LABEL: @test_extrq_ret(
    279 ; CHECK-NEXT:    ret <2 x i64> undef
    280 ;
    281   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
    282   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    283   ret <2 x i64> %2
    284 }
    285 
    286 define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
    287 ; CHECK-LABEL: @test_extrqi_arg0(
    288 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
    289 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    290 ;
    291   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    292   %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
    293   ret <2 x i64> %2
    294 }
    295 
    296 define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
    297 ; CHECK-LABEL: @test_extrqi_ret(
    298 ; CHECK-NEXT:    ret <2 x i64> undef
    299 ;
    300   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
    301   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    302   ret <2 x i64> %2
    303 }
    304 
    305 define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
    306 ; CHECK-LABEL: @test_insertq_arg0(
    307 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
    308 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    309 ;
    310   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    311   %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
    312   ret <2 x i64> %2
    313 }
    314 
    315 define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
    316 ; CHECK-LABEL: @test_insertq_ret(
    317 ; CHECK-NEXT:    ret <2 x i64> undef
    318 ;
    319   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
    320   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    321   ret <2 x i64> %2
    322 }
    323 
    324 define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
    325 ; CHECK-LABEL: @test_insertqi_arg0(
    326 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
    327 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    328 ;
    329   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    330   %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
    331   ret <2 x i64> %2
    332 }
    333 
    334 define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
    335 ; CHECK-LABEL: @test_insertqi_arg1(
    336 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
    337 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    338 ;
    339   %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    340   %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
    341   ret <2 x i64> %2
    342 }
    343 
    344 define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
    345 ; CHECK-LABEL: @test_insertqi_args01(
    346 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
    347 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    348 ;
    349   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    350   %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    351   %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
    352   ret <2 x i64> %3
    353 }
    354 
    355 define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
    356 ; CHECK-LABEL: @test_insertqi_ret(
    357 ; CHECK-NEXT:    ret <2 x i64> undef
    358 ;
    359   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
    360   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    361   ret <2 x i64> %2
    362 }
    363 
    364 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
    365 declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
    366 
    367 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
    368 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
    369 
    370 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
    371 declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
    372 
    373 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
    374 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
    375