Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
      3 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
      4 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
      5 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
      6 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
      7 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
      8 
      9 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
     10 
     11 define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
     12 ; SSE-LABEL: test_mm_add_epi8:
     13 ; SSE:       # %bb.0:
     14 ; SSE-NEXT:    paddb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfc,0xc1]
     15 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     16 ;
     17 ; AVX1-LABEL: test_mm_add_epi8:
     18 ; AVX1:       # %bb.0:
     19 ; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
     20 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     21 ;
     22 ; AVX512-LABEL: test_mm_add_epi8:
     23 ; AVX512:       # %bb.0:
     24 ; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
     25 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     26   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
     27   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
     28   %res = add <16 x i8> %arg0, %arg1
     29   %bc = bitcast <16 x i8> %res to <2 x i64>
     30   ret <2 x i64> %bc
     31 }
     32 
     33 define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
     34 ; SSE-LABEL: test_mm_add_epi16:
     35 ; SSE:       # %bb.0:
     36 ; SSE-NEXT:    paddw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfd,0xc1]
     37 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     38 ;
     39 ; AVX1-LABEL: test_mm_add_epi16:
     40 ; AVX1:       # %bb.0:
     41 ; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
     42 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     43 ;
     44 ; AVX512-LABEL: test_mm_add_epi16:
     45 ; AVX512:       # %bb.0:
     46 ; AVX512-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
     47 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     48   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
     49   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
     50   %res = add <8 x i16> %arg0, %arg1
     51   %bc = bitcast <8 x i16> %res to <2 x i64>
     52   ret <2 x i64> %bc
     53 }
     54 
     55 define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
     56 ; SSE-LABEL: test_mm_add_epi32:
     57 ; SSE:       # %bb.0:
     58 ; SSE-NEXT:    paddd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfe,0xc1]
     59 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     60 ;
     61 ; AVX1-LABEL: test_mm_add_epi32:
     62 ; AVX1:       # %bb.0:
     63 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
     64 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     65 ;
     66 ; AVX512-LABEL: test_mm_add_epi32:
     67 ; AVX512:       # %bb.0:
     68 ; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
     69 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     70   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
     71   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
     72   %res = add <4 x i32> %arg0, %arg1
     73   %bc = bitcast <4 x i32> %res to <2 x i64>
     74   ret <2 x i64> %bc
     75 }
     76 
     77 define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
     78 ; SSE-LABEL: test_mm_add_epi64:
     79 ; SSE:       # %bb.0:
     80 ; SSE-NEXT:    paddq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd4,0xc1]
     81 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     82 ;
     83 ; AVX1-LABEL: test_mm_add_epi64:
     84 ; AVX1:       # %bb.0:
     85 ; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd4,0xc1]
     86 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     87 ;
     88 ; AVX512-LABEL: test_mm_add_epi64:
     89 ; AVX512:       # %bb.0:
     90 ; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
     91 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     92   %res = add <2 x i64> %a0, %a1
     93   ret <2 x i64> %res
     94 }
     95 
     96 define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
     97 ; SSE-LABEL: test_mm_add_pd:
     98 ; SSE:       # %bb.0:
     99 ; SSE-NEXT:    addpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x58,0xc1]
    100 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    101 ;
    102 ; AVX1-LABEL: test_mm_add_pd:
    103 ; AVX1:       # %bb.0:
    104 ; AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
    105 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    106 ;
    107 ; AVX512-LABEL: test_mm_add_pd:
    108 ; AVX512:       # %bb.0:
    109 ; AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
    110 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    111   %res = fadd <2 x double> %a0, %a1
    112   ret <2 x double> %res
    113 }
    114 
    115 define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
    116 ; SSE-LABEL: test_mm_add_sd:
    117 ; SSE:       # %bb.0:
    118 ; SSE-NEXT:    addsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x58,0xc1]
    119 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    120 ;
    121 ; AVX1-LABEL: test_mm_add_sd:
    122 ; AVX1:       # %bb.0:
    123 ; AVX1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x58,0xc1]
    124 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    125 ;
    126 ; AVX512-LABEL: test_mm_add_sd:
    127 ; AVX512:       # %bb.0:
    128 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1]
    129 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    130   %ext0 = extractelement <2 x double> %a0, i32 0
    131   %ext1 = extractelement <2 x double> %a1, i32 0
    132   %fadd = fadd double %ext0, %ext1
    133   %res = insertelement <2 x double> %a0, double %fadd, i32 0
    134   ret <2 x double> %res
    135 }
    136 
    137 define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    138 ; SSE-LABEL: test_mm_adds_epi8:
    139 ; SSE:       # %bb.0:
    140 ; SSE-NEXT:    paddsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xec,0xc1]
    141 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    142 ;
    143 ; AVX1-LABEL: test_mm_adds_epi8:
    144 ; AVX1:       # %bb.0:
    145 ; AVX1-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xec,0xc1]
    146 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    147 ;
    148 ; AVX512-LABEL: test_mm_adds_epi8:
    149 ; AVX512:       # %bb.0:
    150 ; AVX512-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
    151 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    152   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    153   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    154   %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
    155   %bc = bitcast <16 x i8> %res to <2 x i64>
    156   ret <2 x i64> %bc
    157 }
    158 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
    159 
    160 define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    161 ; SSE-LABEL: test_mm_adds_epi16:
    162 ; SSE:       # %bb.0:
    163 ; SSE-NEXT:    paddsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xed,0xc1]
    164 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    165 ;
    166 ; AVX1-LABEL: test_mm_adds_epi16:
    167 ; AVX1:       # %bb.0:
    168 ; AVX1-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xed,0xc1]
    169 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    170 ;
    171 ; AVX512-LABEL: test_mm_adds_epi16:
    172 ; AVX512:       # %bb.0:
    173 ; AVX512-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
    174 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    175   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    176   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    177   %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
    178   %bc = bitcast <8 x i16> %res to <2 x i64>
    179   ret <2 x i64> %bc
    180 }
    181 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
    182 
    183 define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    184 ; SSE-LABEL: test_mm_adds_epu8:
    185 ; SSE:       # %bb.0:
    186 ; SSE-NEXT:    paddusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdc,0xc1]
    187 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    188 ;
    189 ; AVX1-LABEL: test_mm_adds_epu8:
    190 ; AVX1:       # %bb.0:
    191 ; AVX1-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdc,0xc1]
    192 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    193 ;
    194 ; AVX512-LABEL: test_mm_adds_epu8:
    195 ; AVX512:       # %bb.0:
    196 ; AVX512-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
    197 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    198   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    199   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    200   %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
    201   %bc = bitcast <16 x i8> %res to <2 x i64>
    202   ret <2 x i64> %bc
    203 }
    204 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
    205 
    206 define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    207 ; SSE-LABEL: test_mm_adds_epu16:
    208 ; SSE:       # %bb.0:
    209 ; SSE-NEXT:    paddusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdd,0xc1]
    210 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    211 ;
    212 ; AVX1-LABEL: test_mm_adds_epu16:
    213 ; AVX1:       # %bb.0:
    214 ; AVX1-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdd,0xc1]
    215 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    216 ;
    217 ; AVX512-LABEL: test_mm_adds_epu16:
    218 ; AVX512:       # %bb.0:
    219 ; AVX512-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
    220 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    221   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    222   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    223   %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
    224   %bc = bitcast <8 x i16> %res to <2 x i64>
    225   ret <2 x i64> %bc
    226 }
    227 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
    228 
    229 define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
    230 ; SSE-LABEL: test_mm_and_pd:
    231 ; SSE:       # %bb.0:
    232 ; SSE-NEXT:    andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
    233 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    234 ;
    235 ; AVX1-LABEL: test_mm_and_pd:
    236 ; AVX1:       # %bb.0:
    237 ; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
    238 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    239 ;
    240 ; AVX512-LABEL: test_mm_and_pd:
    241 ; AVX512:       # %bb.0:
    242 ; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
    243 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    244   %arg0 = bitcast <2 x double> %a0 to <4 x i32>
    245   %arg1 = bitcast <2 x double> %a1 to <4 x i32>
    246   %res = and <4 x i32> %arg0, %arg1
    247   %bc = bitcast <4 x i32> %res to <2 x double>
    248   ret <2 x double> %bc
    249 }
    250 
    251 define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    252 ; SSE-LABEL: test_mm_and_si128:
    253 ; SSE:       # %bb.0:
    254 ; SSE-NEXT:    andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
    255 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    256 ;
    257 ; AVX1-LABEL: test_mm_and_si128:
    258 ; AVX1:       # %bb.0:
    259 ; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
    260 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    261 ;
    262 ; AVX512-LABEL: test_mm_and_si128:
    263 ; AVX512:       # %bb.0:
    264 ; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
    265 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    266   %res = and <2 x i64> %a0, %a1
    267   ret <2 x i64> %res
    268 }
    269 
    270 define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
    271 ; SSE-LABEL: test_mm_andnot_pd:
    272 ; SSE:       # %bb.0:
    273 ; SSE-NEXT:    andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1]
    274 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    275 ;
    276 ; AVX1-LABEL: test_mm_andnot_pd:
    277 ; AVX1:       # %bb.0:
    278 ; AVX1-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
    279 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    280 ;
    281 ; AVX512-LABEL: test_mm_andnot_pd:
    282 ; AVX512:       # %bb.0:
    283 ; AVX512-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1]
    284 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    285   %arg0 = bitcast <2 x double> %a0 to <4 x i32>
    286   %arg1 = bitcast <2 x double> %a1 to <4 x i32>
    287   %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
    288   %res = and <4 x i32> %not, %arg1
    289   %bc = bitcast <4 x i32> %res to <2 x double>
    290   ret <2 x double> %bc
    291 }
    292 
    293 define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    294 ; SSE-LABEL: test_mm_andnot_si128:
    295 ; SSE:       # %bb.0:
    296 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
    297 ; SSE-NEXT:    pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
    298 ; SSE-NEXT:    pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
    299 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    300 ;
    301 ; AVX1-LABEL: test_mm_andnot_si128:
    302 ; AVX1:       # %bb.0:
    303 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
    304 ; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
    305 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
    306 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    307 ;
    308 ; AVX512-LABEL: test_mm_andnot_si128:
    309 ; AVX512:       # %bb.0:
    310 ; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
    311 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
    312 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    313   %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
    314   %res = and <2 x i64> %not, %a1
    315   ret <2 x i64> %res
    316 }
    317 
    318 define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    319 ; SSE-LABEL: test_mm_avg_epu8:
    320 ; SSE:       # %bb.0:
    321 ; SSE-NEXT:    pavgb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe0,0xc1]
    322 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    323 ;
    324 ; AVX1-LABEL: test_mm_avg_epu8:
    325 ; AVX1:       # %bb.0:
    326 ; AVX1-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe0,0xc1]
    327 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    328 ;
    329 ; AVX512-LABEL: test_mm_avg_epu8:
    330 ; AVX512:       # %bb.0:
    331 ; AVX512-NEXT:    vpmovzxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc0]
    332 ; AVX512-NEXT:    # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    333 ; AVX512-NEXT:    vpmovzxbw %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc9]
    334 ; AVX512-NEXT:    # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    335 ; AVX512-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
    336 ; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
    337 ; AVX512-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1]
    338 ; AVX512-NEXT:    vpsrlw $1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x01]
    339 ; AVX512-NEXT:    vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
    340 ; AVX512-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    341 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    342   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    343   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    344   %zext0 = zext <16 x i8> %arg0 to <16 x i16>
    345   %zext1 = zext <16 x i8> %arg1 to <16 x i16>
    346   %add = add <16 x i16> %zext0, %zext1
    347   %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    348   %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    349   %res = trunc <16 x i16> %lshr to <16 x i8>
    350   %bc = bitcast <16 x i8> %res to <2 x i64>
    351   ret <2 x i64> %bc
    352 }
    353 
    354 define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    355 ; SSE-LABEL: test_mm_avg_epu16:
    356 ; SSE:       # %bb.0:
    357 ; SSE-NEXT:    pavgw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe3,0xc1]
    358 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    359 ;
    360 ; AVX1-LABEL: test_mm_avg_epu16:
    361 ; AVX1:       # %bb.0:
    362 ; AVX1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe3,0xc1]
    363 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    364 ;
    365 ; AVX512-LABEL: test_mm_avg_epu16:
    366 ; AVX512:       # %bb.0:
    367 ; AVX512-NEXT:    vpmovzxwd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc0]
    368 ; AVX512-NEXT:    # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    369 ; AVX512-NEXT:    vpmovzxwd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc9]
    370 ; AVX512-NEXT:    # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    371 ; AVX512-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
    372 ; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
    373 ; AVX512-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1]
    374 ; AVX512-NEXT:    vpsrld $1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x01]
    375 ; AVX512-NEXT:    vpmovdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0]
    376 ; AVX512-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    377 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    378   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    379   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    380   %zext0 = zext <8 x i16> %arg0 to <8 x i32>
    381   %zext1 = zext <8 x i16> %arg1 to <8 x i32>
    382   %add = add <8 x i32> %zext0, %zext1
    383   %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    384   %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    385   %res = trunc <8 x i32> %lshr to <8 x i16>
    386   %bc = bitcast <8 x i16> %res to <2 x i64>
    387   ret <2 x i64> %bc
    388 }
    389 
    390 define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
    391 ; SSE-LABEL: test_mm_bslli_si128:
    392 ; SSE:       # %bb.0:
    393 ; SSE-NEXT:    pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05]
    394 ; SSE-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
    395 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    396 ;
    397 ; AVX1-LABEL: test_mm_bslli_si128:
    398 ; AVX1:       # %bb.0:
    399 ; AVX1-NEXT:    vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05]
    400 ; AVX1-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
    401 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    402 ;
    403 ; AVX512-LABEL: test_mm_bslli_si128:
    404 ; AVX512:       # %bb.0:
    405 ; AVX512-NEXT:    vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05]
    406 ; AVX512-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
    407 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    408   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    409   %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
    410   %bc = bitcast <16 x i8> %res to <2 x i64>
    411   ret <2 x i64> %bc
    412 }
    413 
    414 define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
    415 ; SSE-LABEL: test_mm_bsrli_si128:
    416 ; SSE:       # %bb.0:
    417 ; SSE-NEXT:    psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05]
    418 ; SSE-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
    419 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    420 ;
    421 ; AVX1-LABEL: test_mm_bsrli_si128:
    422 ; AVX1:       # %bb.0:
    423 ; AVX1-NEXT:    vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05]
    424 ; AVX1-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
    425 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    426 ;
    427 ; AVX512-LABEL: test_mm_bsrli_si128:
    428 ; AVX512:       # %bb.0:
    429 ; AVX512-NEXT:    vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05]
    430 ; AVX512-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
    431 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    432   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    433   %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
    434   %bc = bitcast <16 x i8> %res to <2 x i64>
    435   ret <2 x i64> %bc
    436 }
    437 
    438 define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
    439 ; CHECK-LABEL: test_mm_castpd_ps:
    440 ; CHECK:       # %bb.0:
    441 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    442   %res = bitcast <2 x double> %a0 to <4 x float>
    443   ret <4 x float> %res
    444 }
    445 
    446 define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
    447 ; CHECK-LABEL: test_mm_castpd_si128:
    448 ; CHECK:       # %bb.0:
    449 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    450   %res = bitcast <2 x double> %a0 to <2 x i64>
    451   ret <2 x i64> %res
    452 }
    453 
    454 define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
    455 ; CHECK-LABEL: test_mm_castps_pd:
    456 ; CHECK:       # %bb.0:
    457 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    458   %res = bitcast <4 x float> %a0 to <2 x double>
    459   ret <2 x double> %res
    460 }
    461 
    462 define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
    463 ; CHECK-LABEL: test_mm_castps_si128:
    464 ; CHECK:       # %bb.0:
    465 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    466   %res = bitcast <4 x float> %a0 to <2 x i64>
    467   ret <2 x i64> %res
    468 }
    469 
    470 define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
    471 ; CHECK-LABEL: test_mm_castsi128_pd:
    472 ; CHECK:       # %bb.0:
    473 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    474   %res = bitcast <2 x i64> %a0 to <2 x double>
    475   ret <2 x double> %res
    476 }
    477 
    478 define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
    479 ; CHECK-LABEL: test_mm_castsi128_ps:
    480 ; CHECK:       # %bb.0:
    481 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    482   %res = bitcast <2 x i64> %a0 to <4 x float>
    483   ret <4 x float> %res
    484 }
    485 
    486 define void @test_mm_clflush(i8* %a0) nounwind {
    487 ; X86-LABEL: test_mm_clflush:
    488 ; X86:       # %bb.0:
    489 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    490 ; X86-NEXT:    clflush (%eax) # encoding: [0x0f,0xae,0x38]
    491 ; X86-NEXT:    retl # encoding: [0xc3]
    492 ;
    493 ; X64-LABEL: test_mm_clflush:
    494 ; X64:       # %bb.0:
    495 ; X64-NEXT:    clflush (%rdi) # encoding: [0x0f,0xae,0x3f]
    496 ; X64-NEXT:    retq # encoding: [0xc3]
    497   call void @llvm.x86.sse2.clflush(i8* %a0)
    498   ret void
    499 }
    500 declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
    501 
    502 define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    503 ; SSE-LABEL: test_mm_cmpeq_epi8:
    504 ; SSE:       # %bb.0:
    505 ; SSE-NEXT:    pcmpeqb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x74,0xc1]
    506 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    507 ;
    508 ; AVX1-LABEL: test_mm_cmpeq_epi8:
    509 ; AVX1:       # %bb.0:
    510 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x74,0xc1]
    511 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    512 ;
    513 ; AVX512-LABEL: test_mm_cmpeq_epi8:
    514 ; AVX512:       # %bb.0:
    515 ; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
    516 ; AVX512-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
    517 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    518   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    519   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    520   %cmp = icmp eq <16 x i8> %arg0, %arg1
    521   %res = sext <16 x i1> %cmp to <16 x i8>
    522   %bc = bitcast <16 x i8> %res to <2 x i64>
    523   ret <2 x i64> %bc
    524 }
    525 
    526 define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    527 ; SSE-LABEL: test_mm_cmpeq_epi16:
    528 ; SSE:       # %bb.0:
    529 ; SSE-NEXT:    pcmpeqw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x75,0xc1]
    530 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    531 ;
    532 ; AVX1-LABEL: test_mm_cmpeq_epi16:
    533 ; AVX1:       # %bb.0:
    534 ; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x75,0xc1]
    535 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    536 ;
    537 ; AVX512-LABEL: test_mm_cmpeq_epi16:
    538 ; AVX512:       # %bb.0:
    539 ; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
    540 ; AVX512-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
    541 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    542   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    543   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    544   %cmp = icmp eq <8 x i16> %arg0, %arg1
    545   %res = sext <8 x i1> %cmp to <8 x i16>
    546   %bc = bitcast <8 x i16> %res to <2 x i64>
    547   ret <2 x i64> %bc
    548 }
    549 
    550 define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    551 ; SSE-LABEL: test_mm_cmpeq_epi32:
    552 ; SSE:       # %bb.0:
    553 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x76,0xc1]
    554 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    555 ;
    556 ; AVX1-LABEL: test_mm_cmpeq_epi32:
    557 ; AVX1:       # %bb.0:
    558 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x76,0xc1]
    559 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    560 ;
    561 ; AVX512-LABEL: test_mm_cmpeq_epi32:
    562 ; AVX512:       # %bb.0:
    563 ; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
    564 ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
    565 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    566   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    567   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    568   %cmp = icmp eq <4 x i32> %arg0, %arg1
    569   %res = sext <4 x i1> %cmp to <4 x i32>
    570   %bc = bitcast <4 x i32> %res to <2 x i64>
    571   ret <2 x i64> %bc
    572 }
    573 
    574 define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
    575 ; SSE-LABEL: test_mm_cmpeq_pd:
    576 ; SSE:       # %bb.0:
    577 ; SSE-NEXT:    cmpeqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x00]
    578 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    579 ;
    580 ; AVX1-LABEL: test_mm_cmpeq_pd:
    581 ; AVX1:       # %bb.0:
    582 ; AVX1-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x00]
    583 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    584 ;
    585 ; AVX512-LABEL: test_mm_cmpeq_pd:
    586 ; AVX512:       # %bb.0:
    587 ; AVX512-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x00]
    588 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
    589 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    590   %fcmp = fcmp oeq <2 x double> %a0, %a1
    591   %sext = sext <2 x i1> %fcmp to <2 x i64>
    592   %res = bitcast <2 x i64> %sext to <2 x double>
    593   ret <2 x double> %res
    594 }
    595 
    596 define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
    597 ; SSE-LABEL: test_mm_cmpeq_sd:
    598 ; SSE:       # %bb.0:
    599 ; SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x00]
    600 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    601 ;
    602 ; AVX-LABEL: test_mm_cmpeq_sd:
    603 ; AVX:       # %bb.0:
    604 ; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x00]
    605 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    606   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
    607   ret <2 x double> %res
    608 }
    609 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
    610 
    611 define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
    612 ; SSE-LABEL: test_mm_cmpge_pd:
    613 ; SSE:       # %bb.0:
    614 ; SSE-NEXT:    cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02]
    615 ; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
    616 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    617 ;
    618 ; AVX1-LABEL: test_mm_cmpge_pd:
    619 ; AVX1:       # %bb.0:
    620 ; AVX1-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x02]
    621 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    622 ;
    623 ; AVX512-LABEL: test_mm_cmpge_pd:
    624 ; AVX512:       # %bb.0:
    625 ; AVX512-NEXT:    vcmplepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x02]
    626 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
    627 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    628   %fcmp = fcmp ole <2 x double> %a1, %a0
    629   %sext = sext <2 x i1> %fcmp to <2 x i64>
    630   %res = bitcast <2 x i64> %sext to <2 x double>
    631   ret <2 x double> %res
    632 }
    633 
    634 define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
    635 ; SSE-LABEL: test_mm_cmpge_sd:
    636 ; SSE:       # %bb.0:
    637 ; SSE-NEXT:    cmplesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x02]
    638 ; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
    639 ; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
    640 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    641 ;
    642 ; AVX-LABEL: test_mm_cmpge_sd:
    643 ; AVX:       # %bb.0:
    644 ; AVX-NEXT:    vcmplesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x02]
    645 ; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
    646 ; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
    647 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    648   %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
    649   %ext0 = extractelement <2 x double> %cmp, i32 0
    650   %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
    651   %ext1 = extractelement <2 x double> %a0, i32 1
    652   %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
    653   ret <2 x double> %ins1
    654 }
    655 
    656 define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    657 ; SSE-LABEL: test_mm_cmpgt_epi8:
    658 ; SSE:       # %bb.0:
    659 ; SSE-NEXT:    pcmpgtb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x64,0xc1]
    660 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    661 ;
    662 ; AVX1-LABEL: test_mm_cmpgt_epi8:
    663 ; AVX1:       # %bb.0:
    664 ; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x64,0xc1]
    665 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    666 ;
    667 ; AVX512-LABEL: test_mm_cmpgt_epi8:
    668 ; AVX512:       # %bb.0:
    669 ; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
    670 ; AVX512-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
    671 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    672   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    673   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    674   %cmp = icmp sgt <16 x i8> %arg0, %arg1
    675   %res = sext <16 x i1> %cmp to <16 x i8>
    676   %bc = bitcast <16 x i8> %res to <2 x i64>
    677   ret <2 x i64> %bc
    678 }
    679 
    680 define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    681 ; SSE-LABEL: test_mm_cmpgt_epi16:
    682 ; SSE:       # %bb.0:
    683 ; SSE-NEXT:    pcmpgtw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x65,0xc1]
    684 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    685 ;
    686 ; AVX1-LABEL: test_mm_cmpgt_epi16:
    687 ; AVX1:       # %bb.0:
    688 ; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x65,0xc1]
    689 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    690 ;
    691 ; AVX512-LABEL: test_mm_cmpgt_epi16:
    692 ; AVX512:       # %bb.0:
    693 ; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
    694 ; AVX512-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
    695 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    696   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    697   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    698   %cmp = icmp sgt <8 x i16> %arg0, %arg1
    699   %res = sext <8 x i1> %cmp to <8 x i16>
    700   %bc = bitcast <8 x i16> %res to <2 x i64>
    701   ret <2 x i64> %bc
    702 }
    703 
    704 define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    705 ; SSE-LABEL: test_mm_cmpgt_epi32:
    706 ; SSE:       # %bb.0:
    707 ; SSE-NEXT:    pcmpgtd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x66,0xc1]
    708 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    709 ;
    710 ; AVX1-LABEL: test_mm_cmpgt_epi32:
    711 ; AVX1:       # %bb.0:
    712 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x66,0xc1]
    713 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    714 ;
    715 ; AVX512-LABEL: test_mm_cmpgt_epi32:
    716 ; AVX512:       # %bb.0:
    717 ; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
    718 ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
    719 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    720   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    721   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    722   %cmp = icmp sgt <4 x i32> %arg0, %arg1
    723   %res = sext <4 x i1> %cmp to <4 x i32>
    724   %bc = bitcast <4 x i32> %res to <2 x i64>
    725   ret <2 x i64> %bc
    726 }
    727 
    728 define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
    729 ; SSE-LABEL: test_mm_cmpgt_pd:
    730 ; SSE:       # %bb.0:
    731 ; SSE-NEXT:    cmpltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x01]
    732 ; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
    733 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    734 ;
    735 ; AVX1-LABEL: test_mm_cmpgt_pd:
    736 ; AVX1:       # %bb.0:
    737 ; AVX1-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x01]
    738 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    739 ;
    740 ; AVX512-LABEL: test_mm_cmpgt_pd:
    741 ; AVX512:       # %bb.0:
    742 ; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x01]
    743 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
    744 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    745   %fcmp = fcmp olt <2 x double> %a1, %a0
    746   %sext = sext <2 x i1> %fcmp to <2 x i64>
    747   %res = bitcast <2 x i64> %sext to <2 x double>
    748   ret <2 x double> %res
    749 }
    750 
    751 define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
    752 ; SSE-LABEL: test_mm_cmpgt_sd:
    753 ; SSE:       # %bb.0:
    754 ; SSE-NEXT:    cmpltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x01]
    755 ; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
    756 ; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
    757 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    758 ;
    759 ; AVX-LABEL: test_mm_cmpgt_sd:
    760 ; AVX:       # %bb.0:
    761 ; AVX-NEXT:    vcmpltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x01]
    762 ; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
    763 ; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
    764 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    765   %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
    766   %ext0 = extractelement <2 x double> %cmp, i32 0
    767   %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
    768   %ext1 = extractelement <2 x double> %a0, i32 1
    769   %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
    770   ret <2 x double> %ins1
    771 }
    772 
    773 define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
    774 ; SSE-LABEL: test_mm_cmple_pd:
    775 ; SSE:       # %bb.0:
    776 ; SSE-NEXT:    cmplepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x02]
    777 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    778 ;
    779 ; AVX1-LABEL: test_mm_cmple_pd:
    780 ; AVX1:       # %bb.0:
    781 ; AVX1-NEXT:    vcmplepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x02]
    782 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    783 ;
    784 ; AVX512-LABEL: test_mm_cmple_pd:
    785 ; AVX512:       # %bb.0:
    786 ; AVX512-NEXT:    vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
    787 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
    788 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    789   %fcmp = fcmp ole <2 x double> %a0, %a1
    790   %sext = sext <2 x i1> %fcmp to <2 x i64>
    791   %res = bitcast <2 x i64> %sext to <2 x double>
    792   ret <2 x double> %res
    793 }
    794 
    795 define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
    796 ; SSE-LABEL: test_mm_cmple_sd:
    797 ; SSE:       # %bb.0:
    798 ; SSE-NEXT:    cmplesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x02]
    799 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    800 ;
    801 ; AVX-LABEL: test_mm_cmple_sd:
    802 ; AVX:       # %bb.0:
    803 ; AVX-NEXT:    vcmplesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x02]
    804 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    805   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
    806   ret <2 x double> %res
    807 }
    808 
    809 define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    810 ; SSE-LABEL: test_mm_cmplt_epi8:
    811 ; SSE:       # %bb.0:
    812 ; SSE-NEXT:    pcmpgtb %xmm0, %xmm1 # encoding: [0x66,0x0f,0x64,0xc8]
    813 ; SSE-NEXT:    movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
    814 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    815 ;
    816 ; AVX1-LABEL: test_mm_cmplt_epi8:
    817 ; AVX1:       # %bb.0:
    818 ; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x64,0xc0]
    819 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    820 ;
    821 ; AVX512-LABEL: test_mm_cmplt_epi8:
    822 ; AVX512:       # %bb.0:
    823 ; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc0]
    824 ; AVX512-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
    825 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    826   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    827   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    828   %cmp = icmp sgt <16 x i8> %arg1, %arg0
    829   %res = sext <16 x i1> %cmp to <16 x i8>
    830   %bc = bitcast <16 x i8> %res to <2 x i64>
    831   ret <2 x i64> %bc
    832 }
    833 
    834 define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    835 ; SSE-LABEL: test_mm_cmplt_epi16:
    836 ; SSE:       # %bb.0:
    837 ; SSE-NEXT:    pcmpgtw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x65,0xc8]
    838 ; SSE-NEXT:    movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
    839 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    840 ;
    841 ; AVX1-LABEL: test_mm_cmplt_epi16:
    842 ; AVX1:       # %bb.0:
    843 ; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x65,0xc0]
    844 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    845 ;
    846 ; AVX512-LABEL: test_mm_cmplt_epi16:
    847 ; AVX512:       # %bb.0:
    848 ; AVX512-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0]
    849 ; AVX512-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
    850 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    851   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    852   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    853   %cmp = icmp sgt <8 x i16> %arg1, %arg0
    854   %res = sext <8 x i1> %cmp to <8 x i16>
    855   %bc = bitcast <8 x i16> %res to <2 x i64>
    856   ret <2 x i64> %bc
    857 }
    858 
    859 define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
    860 ; SSE-LABEL: test_mm_cmplt_epi32:
    861 ; SSE:       # %bb.0:
    862 ; SSE-NEXT:    pcmpgtd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x66,0xc8]
    863 ; SSE-NEXT:    movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
    864 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    865 ;
    866 ; AVX1-LABEL: test_mm_cmplt_epi32:
    867 ; AVX1:       # %bb.0:
    868 ; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x66,0xc0]
    869 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    870 ;
    871 ; AVX512-LABEL: test_mm_cmplt_epi32:
    872 ; AVX512:       # %bb.0:
    873 ; AVX512-NEXT:    vpcmpgtd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc0]
    874 ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
    875 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    876   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    877   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    878   %cmp = icmp sgt <4 x i32> %arg1, %arg0
    879   %res = sext <4 x i1> %cmp to <4 x i32>
    880   %bc = bitcast <4 x i32> %res to <2 x i64>
    881   ret <2 x i64> %bc
    882 }
    883 
    884 define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
    885 ; SSE-LABEL: test_mm_cmplt_pd:
    886 ; SSE:       # %bb.0:
    887 ; SSE-NEXT:    cmpltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x01]
    888 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    889 ;
    890 ; AVX1-LABEL: test_mm_cmplt_pd:
    891 ; AVX1:       # %bb.0:
    892 ; AVX1-NEXT:    vcmpltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x01]
    893 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    894 ;
    895 ; AVX512-LABEL: test_mm_cmplt_pd:
    896 ; AVX512:       # %bb.0:
    897 ; AVX512-NEXT:    vcmpltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x01]
    898 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
    899 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    900   %fcmp = fcmp olt <2 x double> %a0, %a1
    901   %sext = sext <2 x i1> %fcmp to <2 x i64>
    902   %res = bitcast <2 x i64> %sext to <2 x double>
    903   ret <2 x double> %res
    904 }
    905 
    906 define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
    907 ; SSE-LABEL: test_mm_cmplt_sd:
    908 ; SSE:       # %bb.0:
    909 ; SSE-NEXT:    cmpltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x01]
    910 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    911 ;
    912 ; AVX-LABEL: test_mm_cmplt_sd:
    913 ; AVX:       # %bb.0:
    914 ; AVX-NEXT:    vcmpltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x01]
    915 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    916   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
    917   ret <2 x double> %res
    918 }
    919 
    920 define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
    921 ; SSE-LABEL: test_mm_cmpneq_pd:
    922 ; SSE:       # %bb.0:
    923 ; SSE-NEXT:    cmpneqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x04]
    924 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    925 ;
    926 ; AVX1-LABEL: test_mm_cmpneq_pd:
    927 ; AVX1:       # %bb.0:
    928 ; AVX1-NEXT:    vcmpneqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x04]
    929 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    930 ;
    931 ; AVX512-LABEL: test_mm_cmpneq_pd:
    932 ; AVX512:       # %bb.0:
    933 ; AVX512-NEXT:    vcmpneqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x04]
    934 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
    935 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    936   %fcmp = fcmp une <2 x double> %a0, %a1
    937   %sext = sext <2 x i1> %fcmp to <2 x i64>
    938   %res = bitcast <2 x i64> %sext to <2 x double>
    939   ret <2 x double> %res
    940 }
    941 
    942 define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
    943 ; SSE-LABEL: test_mm_cmpneq_sd:
    944 ; SSE:       # %bb.0:
    945 ; SSE-NEXT:    cmpneqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x04]
    946 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    947 ;
    948 ; AVX-LABEL: test_mm_cmpneq_sd:
    949 ; AVX:       # %bb.0:
    950 ; AVX-NEXT:    vcmpneqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x04]
    951 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    952   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
    953   ret <2 x double> %res
    954 }
    955 
    956 define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
    957 ; SSE-LABEL: test_mm_cmpnge_pd:
    958 ; SSE:       # %bb.0:
    959 ; SSE-NEXT:    cmpnlepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x06]
    960 ; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
    961 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    962 ;
    963 ; AVX1-LABEL: test_mm_cmpnge_pd:
    964 ; AVX1:       # %bb.0:
    965 ; AVX1-NEXT:    vcmpnlepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x06]
    966 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    967 ;
    968 ; AVX512-LABEL: test_mm_cmpnge_pd:
    969 ; AVX512:       # %bb.0:
    970 ; AVX512-NEXT:    vcmpnlepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x06]
    971 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
    972 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    973   %fcmp = fcmp ugt <2 x double> %a1, %a0
    974   %sext = sext <2 x i1> %fcmp to <2 x i64>
    975   %res = bitcast <2 x i64> %sext to <2 x double>
    976   ret <2 x double> %res
    977 }
    978 
    979 define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
    980 ; SSE-LABEL: test_mm_cmpnge_sd:
    981 ; SSE:       # %bb.0:
    982 ; SSE-NEXT:    cmpnlesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x06]
    983 ; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
    984 ; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
    985 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    986 ;
    987 ; AVX-LABEL: test_mm_cmpnge_sd:
    988 ; AVX:       # %bb.0:
    989 ; AVX-NEXT:    vcmpnlesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x06]
    990 ; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
    991 ; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
    992 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    993   %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
    994   %ext0 = extractelement <2 x double> %cmp, i32 0
    995   %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
    996   %ext1 = extractelement <2 x double> %a0, i32 1
    997   %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
    998   ret <2 x double> %ins1
    999 }
   1000 
   1001 define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1002 ; SSE-LABEL: test_mm_cmpngt_pd:
   1003 ; SSE:       # %bb.0:
   1004 ; SSE-NEXT:    cmpnltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x05]
   1005 ; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
   1006 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1007 ;
   1008 ; AVX1-LABEL: test_mm_cmpngt_pd:
   1009 ; AVX1:       # %bb.0:
   1010 ; AVX1-NEXT:    vcmpnltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x05]
   1011 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1012 ;
   1013 ; AVX512-LABEL: test_mm_cmpngt_pd:
   1014 ; AVX512:       # %bb.0:
   1015 ; AVX512-NEXT:    vcmpnltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x05]
   1016 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
   1017 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1018   %fcmp = fcmp uge <2 x double> %a1, %a0
   1019   %sext = sext <2 x i1> %fcmp to <2 x i64>
   1020   %res = bitcast <2 x i64> %sext to <2 x double>
   1021   ret <2 x double> %res
   1022 }
   1023 
   1024 define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1025 ; SSE-LABEL: test_mm_cmpngt_sd:
   1026 ; SSE:       # %bb.0:
   1027 ; SSE-NEXT:    cmpnltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x05]
   1028 ; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
   1029 ; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
   1030 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1031 ;
   1032 ; AVX-LABEL: test_mm_cmpngt_sd:
   1033 ; AVX:       # %bb.0:
   1034 ; AVX-NEXT:    vcmpnltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x05]
   1035 ; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
   1036 ; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
   1037 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1038   %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
   1039   %ext0 = extractelement <2 x double> %cmp, i32 0
   1040   %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
   1041   %ext1 = extractelement <2 x double> %a0, i32 1
   1042   %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
   1043   ret <2 x double> %ins1
   1044 }
   1045 
   1046 define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1047 ; SSE-LABEL: test_mm_cmpnle_pd:
   1048 ; SSE:       # %bb.0:
   1049 ; SSE-NEXT:    cmpnlepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x06]
   1050 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1051 ;
   1052 ; AVX1-LABEL: test_mm_cmpnle_pd:
   1053 ; AVX1:       # %bb.0:
   1054 ; AVX1-NEXT:    vcmpnlepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x06]
   1055 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1056 ;
   1057 ; AVX512-LABEL: test_mm_cmpnle_pd:
   1058 ; AVX512:       # %bb.0:
   1059 ; AVX512-NEXT:    vcmpnlepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x06]
   1060 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
   1061 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1062   %fcmp = fcmp ugt <2 x double> %a0, %a1
   1063   %sext = sext <2 x i1> %fcmp to <2 x i64>
   1064   %res = bitcast <2 x i64> %sext to <2 x double>
   1065   ret <2 x double> %res
   1066 }
   1067 
   1068 define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1069 ; SSE-LABEL: test_mm_cmpnle_sd:
   1070 ; SSE:       # %bb.0:
   1071 ; SSE-NEXT:    cmpnlesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x06]
   1072 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1073 ;
   1074 ; AVX-LABEL: test_mm_cmpnle_sd:
   1075 ; AVX:       # %bb.0:
   1076 ; AVX-NEXT:    vcmpnlesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x06]
   1077 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1078   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
   1079   ret <2 x double> %res
   1080 }
   1081 
   1082 define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1083 ; SSE-LABEL: test_mm_cmpnlt_pd:
   1084 ; SSE:       # %bb.0:
   1085 ; SSE-NEXT:    cmpnltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x05]
   1086 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1087 ;
   1088 ; AVX1-LABEL: test_mm_cmpnlt_pd:
   1089 ; AVX1:       # %bb.0:
   1090 ; AVX1-NEXT:    vcmpnltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x05]
   1091 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1092 ;
   1093 ; AVX512-LABEL: test_mm_cmpnlt_pd:
   1094 ; AVX512:       # %bb.0:
   1095 ; AVX512-NEXT:    vcmpnltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x05]
   1096 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
   1097 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1098   %fcmp = fcmp uge <2 x double> %a0, %a1
   1099   %sext = sext <2 x i1> %fcmp to <2 x i64>
   1100   %res = bitcast <2 x i64> %sext to <2 x double>
   1101   ret <2 x double> %res
   1102 }
   1103 
   1104 define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1105 ; SSE-LABEL: test_mm_cmpnlt_sd:
   1106 ; SSE:       # %bb.0:
   1107 ; SSE-NEXT:    cmpnltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x05]
   1108 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1109 ;
   1110 ; AVX-LABEL: test_mm_cmpnlt_sd:
   1111 ; AVX:       # %bb.0:
   1112 ; AVX-NEXT:    vcmpnltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x05]
   1113 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1114   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
   1115   ret <2 x double> %res
   1116 }
   1117 
   1118 define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1119 ; SSE-LABEL: test_mm_cmpord_pd:
   1120 ; SSE:       # %bb.0:
   1121 ; SSE-NEXT:    cmpordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x07]
   1122 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1123 ;
   1124 ; AVX1-LABEL: test_mm_cmpord_pd:
   1125 ; AVX1:       # %bb.0:
   1126 ; AVX1-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x07]
   1127 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1128 ;
   1129 ; AVX512-LABEL: test_mm_cmpord_pd:
   1130 ; AVX512:       # %bb.0:
   1131 ; AVX512-NEXT:    vcmpordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x07]
   1132 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
   1133 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1134   %fcmp = fcmp ord <2 x double> %a0, %a1
   1135   %sext = sext <2 x i1> %fcmp to <2 x i64>
   1136   %res = bitcast <2 x i64> %sext to <2 x double>
   1137   ret <2 x double> %res
   1138 }
   1139 
   1140 define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1141 ; SSE-LABEL: test_mm_cmpord_sd:
   1142 ; SSE:       # %bb.0:
   1143 ; SSE-NEXT:    cmpordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x07]
   1144 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1145 ;
   1146 ; AVX-LABEL: test_mm_cmpord_sd:
   1147 ; AVX:       # %bb.0:
   1148 ; AVX-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x07]
   1149 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1150   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
   1151   ret <2 x double> %res
   1152 }
   1153 
   1154 define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1155 ; SSE-LABEL: test_mm_cmpunord_pd:
   1156 ; SSE:       # %bb.0:
   1157 ; SSE-NEXT:    cmpunordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x03]
   1158 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1159 ;
   1160 ; AVX1-LABEL: test_mm_cmpunord_pd:
   1161 ; AVX1:       # %bb.0:
   1162 ; AVX1-NEXT:    vcmpunordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x03]
   1163 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1164 ;
   1165 ; AVX512-LABEL: test_mm_cmpunord_pd:
   1166 ; AVX512:       # %bb.0:
   1167 ; AVX512-NEXT:    vcmpunordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x03]
   1168 ; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
   1169 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1170   %fcmp = fcmp uno <2 x double> %a0, %a1
   1171   %sext = sext <2 x i1> %fcmp to <2 x i64>
   1172   %res = bitcast <2 x i64> %sext to <2 x double>
   1173   ret <2 x double> %res
   1174 }
   1175 
   1176 define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1177 ; SSE-LABEL: test_mm_cmpunord_sd:
   1178 ; SSE:       # %bb.0:
   1179 ; SSE-NEXT:    cmpunordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x03]
   1180 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1181 ;
   1182 ; AVX-LABEL: test_mm_cmpunord_sd:
   1183 ; AVX:       # %bb.0:
   1184 ; AVX-NEXT:    vcmpunordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x03]
   1185 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1186   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
   1187   ret <2 x double> %res
   1188 }
   1189 
   1190 define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1191 ; SSE-LABEL: test_mm_comieq_sd:
   1192 ; SSE:       # %bb.0:
   1193 ; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
   1194 ; SSE-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
   1195 ; SSE-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
   1196 ; SSE-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
   1197 ; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   1198 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1199 ;
   1200 ; AVX1-LABEL: test_mm_comieq_sd:
   1201 ; AVX1:       # %bb.0:
   1202 ; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
   1203 ; AVX1-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
   1204 ; AVX1-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
   1205 ; AVX1-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
   1206 ; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   1207 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1208 ;
   1209 ; AVX512-LABEL: test_mm_comieq_sd:
   1210 ; AVX512:       # %bb.0:
   1211 ; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
   1212 ; AVX512-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
   1213 ; AVX512-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
   1214 ; AVX512-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
   1215 ; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   1216 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1217   %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
   1218   ret i32 %res
   1219 }
   1220 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
   1221 
   1222 define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1223 ; SSE-LABEL: test_mm_comige_sd:
   1224 ; SSE:       # %bb.0:
   1225 ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1226 ; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
   1227 ; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   1228 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1229 ;
   1230 ; AVX1-LABEL: test_mm_comige_sd:
   1231 ; AVX1:       # %bb.0:
   1232 ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1233 ; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
   1234 ; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   1235 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1236 ;
   1237 ; AVX512-LABEL: test_mm_comige_sd:
   1238 ; AVX512:       # %bb.0:
   1239 ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1240 ; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
   1241 ; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   1242 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1243   %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
   1244   ret i32 %res
   1245 }
   1246 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
   1247 
   1248 define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1249 ; SSE-LABEL: test_mm_comigt_sd:
   1250 ; SSE:       # %bb.0:
   1251 ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1252 ; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
   1253 ; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   1254 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1255 ;
   1256 ; AVX1-LABEL: test_mm_comigt_sd:
   1257 ; AVX1:       # %bb.0:
   1258 ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1259 ; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
   1260 ; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   1261 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1262 ;
   1263 ; AVX512-LABEL: test_mm_comigt_sd:
   1264 ; AVX512:       # %bb.0:
   1265 ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1266 ; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
   1267 ; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   1268 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1269   %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
   1270   ret i32 %res
   1271 }
   1272 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
   1273 
   1274 define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1275 ; SSE-LABEL: test_mm_comile_sd:
   1276 ; SSE:       # %bb.0:
   1277 ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1278 ; SSE-NEXT:    comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8]
   1279 ; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   1280 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1281 ;
   1282 ; AVX1-LABEL: test_mm_comile_sd:
   1283 ; AVX1:       # %bb.0:
   1284 ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1285 ; AVX1-NEXT:    vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8]
   1286 ; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   1287 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1288 ;
   1289 ; AVX512-LABEL: test_mm_comile_sd:
   1290 ; AVX512:       # %bb.0:
   1291 ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1292 ; AVX512-NEXT:    vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
   1293 ; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   1294 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1295   %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
   1296   ret i32 %res
   1297 }
   1298 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
   1299 
   1300 define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1301 ; SSE-LABEL: test_mm_comilt_sd:
   1302 ; SSE:       # %bb.0:
   1303 ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1304 ; SSE-NEXT:    comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8]
   1305 ; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   1306 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1307 ;
   1308 ; AVX1-LABEL: test_mm_comilt_sd:
   1309 ; AVX1:       # %bb.0:
   1310 ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1311 ; AVX1-NEXT:    vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8]
   1312 ; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   1313 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1314 ;
   1315 ; AVX512-LABEL: test_mm_comilt_sd:
   1316 ; AVX512:       # %bb.0:
   1317 ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   1318 ; AVX512-NEXT:    vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
   1319 ; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   1320 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1321   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
   1322   ret i32 %res
   1323 }
   1324 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
   1325 
   1326 define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1327 ; SSE-LABEL: test_mm_comineq_sd:
   1328 ; SSE:       # %bb.0:
   1329 ; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
   1330 ; SSE-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
   1331 ; SSE-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
   1332 ; SSE-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
   1333 ; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   1334 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1335 ;
   1336 ; AVX1-LABEL: test_mm_comineq_sd:
   1337 ; AVX1:       # %bb.0:
   1338 ; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
   1339 ; AVX1-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
   1340 ; AVX1-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
   1341 ; AVX1-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
   1342 ; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   1343 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1344 ;
   1345 ; AVX512-LABEL: test_mm_comineq_sd:
   1346 ; AVX512:       # %bb.0:
   1347 ; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
   1348 ; AVX512-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
   1349 ; AVX512-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
   1350 ; AVX512-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
   1351 ; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   1352 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1353   %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
   1354   ret i32 %res
   1355 }
   1356 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
   1357 
   1358 define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
   1359 ; SSE-LABEL: test_mm_cvtepi32_pd:
   1360 ; SSE:       # %bb.0:
   1361 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0 # encoding: [0xf3,0x0f,0xe6,0xc0]
   1362 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1363 ;
   1364 ; AVX1-LABEL: test_mm_cvtepi32_pd:
   1365 ; AVX1:       # %bb.0:
   1366 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0]
   1367 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1368 ;
   1369 ; AVX512-LABEL: test_mm_cvtepi32_pd:
   1370 ; AVX512:       # %bb.0:
   1371 ; AVX512-NEXT:    vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
   1372 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1373   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   1374   %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
   1375   %res = sitofp <2 x i32> %ext to <2 x double>
   1376   ret <2 x double> %res
   1377 }
   1378 
   1379 define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
   1380 ; SSE-LABEL: test_mm_cvtepi32_ps:
   1381 ; SSE:       # %bb.0:
   1382 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0 # encoding: [0x0f,0x5b,0xc0]
   1383 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1384 ;
   1385 ; AVX1-LABEL: test_mm_cvtepi32_ps:
   1386 ; AVX1:       # %bb.0:
   1387 ; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5b,0xc0]
   1388 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1389 ;
   1390 ; AVX512-LABEL: test_mm_cvtepi32_ps:
   1391 ; AVX512:       # %bb.0:
   1392 ; AVX512-NEXT:    vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
   1393 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1394   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   1395   %res = sitofp <4 x i32> %arg0 to <4 x float>
   1396   ret <4 x float> %res
   1397 }
   1398 
   1399 define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
   1400 ; SSE-LABEL: test_mm_cvtpd_epi32:
   1401 ; SSE:       # %bb.0:
   1402 ; SSE-NEXT:    cvtpd2dq %xmm0, %xmm0 # encoding: [0xf2,0x0f,0xe6,0xc0]
   1403 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1404 ;
   1405 ; AVX1-LABEL: test_mm_cvtpd_epi32:
   1406 ; AVX1:       # %bb.0:
   1407 ; AVX1-NEXT:    vcvtpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xe6,0xc0]
   1408 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1409 ;
   1410 ; AVX512-LABEL: test_mm_cvtpd_epi32:
   1411 ; AVX512:       # %bb.0:
   1412 ; AVX512-NEXT:    vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
   1413 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1414   %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
   1415   %bc = bitcast <4 x i32> %res to <2 x i64>
   1416   ret <2 x i64> %bc
   1417 }
   1418 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
   1419 
   1420 define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
   1421 ; SSE-LABEL: test_mm_cvtpd_ps:
   1422 ; SSE:       # %bb.0:
   1423 ; SSE-NEXT:    cvtpd2ps %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5a,0xc0]
   1424 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1425 ;
   1426 ; AVX1-LABEL: test_mm_cvtpd_ps:
   1427 ; AVX1:       # %bb.0:
   1428 ; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5a,0xc0]
   1429 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1430 ;
   1431 ; AVX512-LABEL: test_mm_cvtpd_ps:
   1432 ; AVX512:       # %bb.0:
   1433 ; AVX512-NEXT:    vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
   1434 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1435   %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
   1436   ret <4 x float> %res
   1437 }
   1438 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
   1439 
   1440 define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
   1441 ; SSE-LABEL: test_mm_cvtps_epi32:
   1442 ; SSE:       # %bb.0:
   1443 ; SSE-NEXT:    cvtps2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5b,0xc0]
   1444 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1445 ;
   1446 ; AVX1-LABEL: test_mm_cvtps_epi32:
   1447 ; AVX1:       # %bb.0:
   1448 ; AVX1-NEXT:    vcvtps2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5b,0xc0]
   1449 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1450 ;
   1451 ; AVX512-LABEL: test_mm_cvtps_epi32:
   1452 ; AVX512:       # %bb.0:
   1453 ; AVX512-NEXT:    vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
   1454 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1455   %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
   1456   %bc = bitcast <4 x i32> %res to <2 x i64>
   1457   ret <2 x i64> %bc
   1458 }
   1459 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
   1460 
   1461 define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
   1462 ; SSE-LABEL: test_mm_cvtps_pd:
   1463 ; SSE:       # %bb.0:
   1464 ; SSE-NEXT:    cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0]
   1465 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1466 ;
   1467 ; AVX1-LABEL: test_mm_cvtps_pd:
   1468 ; AVX1:       # %bb.0:
   1469 ; AVX1-NEXT:    vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
   1470 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1471 ;
   1472 ; AVX512-LABEL: test_mm_cvtps_pd:
   1473 ; AVX512:       # %bb.0:
   1474 ; AVX512-NEXT:    vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
   1475 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1476   %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
   1477   %res = fpext <2 x float> %ext to <2 x double>
   1478   ret <2 x double> %res
   1479 }
   1480 
   1481 define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
   1482 ; X86-SSE-LABEL: test_mm_cvtsd_f64:
   1483 ; X86-SSE:       # %bb.0:
   1484 ; X86-SSE-NEXT:    pushl %ebp # encoding: [0x55]
   1485 ; X86-SSE-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
   1486 ; X86-SSE-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
   1487 ; X86-SSE-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
   1488 ; X86-SSE-NEXT:    movlps %xmm0, (%esp) # encoding: [0x0f,0x13,0x04,0x24]
   1489 ; X86-SSE-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
   1490 ; X86-SSE-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
   1491 ; X86-SSE-NEXT:    popl %ebp # encoding: [0x5d]
   1492 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   1493 ;
   1494 ; X86-AVX1-LABEL: test_mm_cvtsd_f64:
   1495 ; X86-AVX1:       # %bb.0:
   1496 ; X86-AVX1-NEXT:    pushl %ebp # encoding: [0x55]
   1497 ; X86-AVX1-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
   1498 ; X86-AVX1-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
   1499 ; X86-AVX1-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
   1500 ; X86-AVX1-NEXT:    vmovlps %xmm0, (%esp) # encoding: [0xc5,0xf8,0x13,0x04,0x24]
   1501 ; X86-AVX1-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
   1502 ; X86-AVX1-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
   1503 ; X86-AVX1-NEXT:    popl %ebp # encoding: [0x5d]
   1504 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   1505 ;
   1506 ; X86-AVX512-LABEL: test_mm_cvtsd_f64:
   1507 ; X86-AVX512:       # %bb.0:
   1508 ; X86-AVX512-NEXT:    pushl %ebp # encoding: [0x55]
   1509 ; X86-AVX512-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
   1510 ; X86-AVX512-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
   1511 ; X86-AVX512-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
   1512 ; X86-AVX512-NEXT:    vmovlps %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x04,0x24]
   1513 ; X86-AVX512-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
   1514 ; X86-AVX512-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
   1515 ; X86-AVX512-NEXT:    popl %ebp # encoding: [0x5d]
   1516 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   1517 ;
   1518 ; X64-LABEL: test_mm_cvtsd_f64:
   1519 ; X64:       # %bb.0:
   1520 ; X64-NEXT:    retq # encoding: [0xc3]
   1521   %res = extractelement <2 x double> %a0, i32 0
   1522   ret double %res
   1523 }
   1524 
   1525 define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
   1526 ; SSE-LABEL: test_mm_cvtsd_si32:
   1527 ; SSE:       # %bb.0:
   1528 ; SSE-NEXT:    cvtsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2d,0xc0]
   1529 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1530 ;
   1531 ; AVX1-LABEL: test_mm_cvtsd_si32:
   1532 ; AVX1:       # %bb.0:
   1533 ; AVX1-NEXT:    vcvtsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2d,0xc0]
   1534 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1535 ;
   1536 ; AVX512-LABEL: test_mm_cvtsd_si32:
   1537 ; AVX512:       # %bb.0:
   1538 ; AVX512-NEXT:    vcvtsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0]
   1539 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1540   %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
   1541   ret i32 %res
   1542 }
   1543 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
   1544 
   1545 define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
   1546 ; SSE-LABEL: test_mm_cvtsd_ss:
   1547 ; SSE:       # %bb.0:
   1548 ; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1]
   1549 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1550 ;
   1551 ; AVX-LABEL: test_mm_cvtsd_ss:
   1552 ; AVX:       # %bb.0:
   1553 ; AVX-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1]
   1554 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1555   %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
   1556   ret <4 x float> %res
   1557 }
   1558 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
   1559 
   1560 define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
   1561 ; X86-SSE-LABEL: test_mm_cvtsd_ss_load:
   1562 ; X86-SSE:       # %bb.0:
   1563 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1564 ; X86-SSE-NEXT:    cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00]
   1565 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   1566 ;
   1567 ; X86-AVX-LABEL: test_mm_cvtsd_ss_load:
   1568 ; X86-AVX:       # %bb.0:
   1569 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1570 ; X86-AVX-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00]
   1571 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
   1572 ;
   1573 ; X64-SSE-LABEL: test_mm_cvtsd_ss_load:
   1574 ; X64-SSE:       # %bb.0:
   1575 ; X64-SSE-NEXT:    cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07]
   1576 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   1577 ;
   1578 ; X64-AVX-LABEL: test_mm_cvtsd_ss_load:
   1579 ; X64-AVX:       # %bb.0:
   1580 ; X64-AVX-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07]
   1581 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
   1582   %a1 = load <2 x double>, <2 x double>* %p1
   1583   %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
   1584   ret <4 x float> %res
   1585 }
   1586 
   1587 define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
   1588 ; SSE-LABEL: test_mm_cvtsi128_si32:
   1589 ; SSE:       # %bb.0:
   1590 ; SSE-NEXT:    movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
   1591 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1592 ;
   1593 ; AVX1-LABEL: test_mm_cvtsi128_si32:
   1594 ; AVX1:       # %bb.0:
   1595 ; AVX1-NEXT:    vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
   1596 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1597 ;
   1598 ; AVX512-LABEL: test_mm_cvtsi128_si32:
   1599 ; AVX512:       # %bb.0:
   1600 ; AVX512-NEXT:    vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
   1601 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1602   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   1603   %res = extractelement <4 x i32> %arg0, i32 0
   1604   ret i32 %res
   1605 }
   1606 
   1607 define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
   1608 ; X86-SSE-LABEL: test_mm_cvtsi32_sd:
   1609 ; X86-SSE:       # %bb.0:
   1610 ; X86-SSE-NEXT:    cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04]
   1611 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   1612 ;
   1613 ; X86-AVX1-LABEL: test_mm_cvtsi32_sd:
   1614 ; X86-AVX1:       # %bb.0:
   1615 ; X86-AVX1-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
   1616 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   1617 ;
   1618 ; X86-AVX512-LABEL: test_mm_cvtsi32_sd:
   1619 ; X86-AVX512:       # %bb.0:
   1620 ; X86-AVX512-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
   1621 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   1622 ;
   1623 ; X64-SSE-LABEL: test_mm_cvtsi32_sd:
   1624 ; X64-SSE:       # %bb.0:
   1625 ; X64-SSE-NEXT:    cvtsi2sdl %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7]
   1626 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   1627 ;
   1628 ; X64-AVX1-LABEL: test_mm_cvtsi32_sd:
   1629 ; X64-AVX1:       # %bb.0:
   1630 ; X64-AVX1-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7]
   1631 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   1632 ;
   1633 ; X64-AVX512-LABEL: test_mm_cvtsi32_sd:
   1634 ; X64-AVX512:       # %bb.0:
   1635 ; X64-AVX512-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
   1636 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   1637   %cvt = sitofp i32 %a1 to double
   1638   %res = insertelement <2 x double> %a0, double %cvt, i32 0
   1639   ret <2 x double> %res
   1640 }
   1641 
   1642 define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
   1643 ; X86-SSE-LABEL: test_mm_cvtsi32_si128:
   1644 ; X86-SSE:       # %bb.0:
   1645 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
   1646 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   1647 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   1648 ;
   1649 ; X86-AVX1-LABEL: test_mm_cvtsi32_si128:
   1650 ; X86-AVX1:       # %bb.0:
   1651 ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
   1652 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
   1653 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   1654 ;
   1655 ; X86-AVX512-LABEL: test_mm_cvtsi32_si128:
   1656 ; X86-AVX512:       # %bb.0:
   1657 ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
   1658 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
   1659 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   1660 ;
   1661 ; X64-SSE-LABEL: test_mm_cvtsi32_si128:
   1662 ; X64-SSE:       # %bb.0:
   1663 ; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
   1664 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   1665 ;
   1666 ; X64-AVX1-LABEL: test_mm_cvtsi32_si128:
   1667 ; X64-AVX1:       # %bb.0:
   1668 ; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
   1669 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   1670 ;
   1671 ; X64-AVX512-LABEL: test_mm_cvtsi32_si128:
   1672 ; X64-AVX512:       # %bb.0:
   1673 ; X64-AVX512-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
   1674 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   1675   %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
   1676   %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
   1677   %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
   1678   %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
   1679   %res = bitcast <4 x i32> %res3 to <2 x i64>
   1680   ret <2 x i64> %res
   1681 }
   1682 
   1683 define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
   1684 ; SSE-LABEL: test_mm_cvtss_sd:
   1685 ; SSE:       # %bb.0:
   1686 ; SSE-NEXT:    cvtss2sd %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5a,0xc1]
   1687 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1688 ;
   1689 ; AVX1-LABEL: test_mm_cvtss_sd:
   1690 ; AVX1:       # %bb.0:
   1691 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5a,0xc1]
   1692 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1693 ;
   1694 ; AVX512-LABEL: test_mm_cvtss_sd:
   1695 ; AVX512:       # %bb.0:
   1696 ; AVX512-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1]
   1697 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1698   %ext = extractelement <4 x float> %a1, i32 0
   1699   %cvt = fpext float %ext to double
   1700   %res = insertelement <2 x double> %a0, double %cvt, i32 0
   1701   ret <2 x double> %res
   1702 }
   1703 
   1704 define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
   1705 ; SSE-LABEL: test_mm_cvttpd_epi32:
   1706 ; SSE:       # %bb.0:
   1707 ; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0xe6,0xc0]
   1708 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1709 ;
   1710 ; AVX1-LABEL: test_mm_cvttpd_epi32:
   1711 ; AVX1:       # %bb.0:
   1712 ; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe6,0xc0]
   1713 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1714 ;
   1715 ; AVX512-LABEL: test_mm_cvttpd_epi32:
   1716 ; AVX512:       # %bb.0:
   1717 ; AVX512-NEXT:    vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
   1718 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1719   %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
   1720   %bc = bitcast <4 x i32> %res to <2 x i64>
   1721   ret <2 x i64> %bc
   1722 }
   1723 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
   1724 
   1725 define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
   1726 ; SSE-LABEL: test_mm_cvttps_epi32:
   1727 ; SSE:       # %bb.0:
   1728 ; SSE-NEXT:    cvttps2dq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x5b,0xc0]
   1729 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1730 ;
   1731 ; AVX1-LABEL: test_mm_cvttps_epi32:
   1732 ; AVX1:       # %bb.0:
   1733 ; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5b,0xc0]
   1734 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1735 ;
   1736 ; AVX512-LABEL: test_mm_cvttps_epi32:
   1737 ; AVX512:       # %bb.0:
   1738 ; AVX512-NEXT:    vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
   1739 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1740   %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
   1741   %bc = bitcast <4 x i32> %res to <2 x i64>
   1742   ret <2 x i64> %bc
   1743 }
   1744 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
   1745 
   1746 define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
   1747 ; SSE-LABEL: test_mm_cvttsd_si32:
   1748 ; SSE:       # %bb.0:
   1749 ; SSE-NEXT:    cvttsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2c,0xc0]
   1750 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1751 ;
   1752 ; AVX1-LABEL: test_mm_cvttsd_si32:
   1753 ; AVX1:       # %bb.0:
   1754 ; AVX1-NEXT:    vcvttsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2c,0xc0]
   1755 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1756 ;
   1757 ; AVX512-LABEL: test_mm_cvttsd_si32:
   1758 ; AVX512:       # %bb.0:
   1759 ; AVX512-NEXT:    vcvttsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0]
   1760 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1761   %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
   1762   ret i32 %res
   1763 }
   1764 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
   1765 
   1766 define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1767 ; SSE-LABEL: test_mm_div_pd:
   1768 ; SSE:       # %bb.0:
   1769 ; SSE-NEXT:    divpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5e,0xc1]
   1770 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1771 ;
   1772 ; AVX1-LABEL: test_mm_div_pd:
   1773 ; AVX1:       # %bb.0:
   1774 ; AVX1-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5e,0xc1]
   1775 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1776 ;
   1777 ; AVX512-LABEL: test_mm_div_pd:
   1778 ; AVX512:       # %bb.0:
   1779 ; AVX512-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5e,0xc1]
   1780 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1781   %res = fdiv <2 x double> %a0, %a1
   1782   ret <2 x double> %res
   1783 }
   1784 
   1785 define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   1786 ; SSE-LABEL: test_mm_div_sd:
   1787 ; SSE:       # %bb.0:
   1788 ; SSE-NEXT:    divsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5e,0xc1]
   1789 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1790 ;
   1791 ; AVX1-LABEL: test_mm_div_sd:
   1792 ; AVX1:       # %bb.0:
   1793 ; AVX1-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5e,0xc1]
   1794 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1795 ;
   1796 ; AVX512-LABEL: test_mm_div_sd:
   1797 ; AVX512:       # %bb.0:
   1798 ; AVX512-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1]
   1799 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1800   %ext0 = extractelement <2 x double> %a0, i32 0
   1801   %ext1 = extractelement <2 x double> %a1, i32 0
   1802   %fdiv = fdiv double %ext0, %ext1
   1803   %res = insertelement <2 x double> %a0, double %fdiv, i32 0
   1804   ret <2 x double> %res
   1805 }
   1806 
   1807 define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
   1808 ; SSE-LABEL: test_mm_extract_epi16:
   1809 ; SSE:       # %bb.0:
   1810 ; SSE-NEXT:    pextrw $1, %xmm0, %eax # encoding: [0x66,0x0f,0xc5,0xc0,0x01]
   1811 ; SSE-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
   1812 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1813 ;
   1814 ; AVX1-LABEL: test_mm_extract_epi16:
   1815 ; AVX1:       # %bb.0:
   1816 ; AVX1-NEXT:    vpextrw $1, %xmm0, %eax # encoding: [0xc5,0xf9,0xc5,0xc0,0x01]
   1817 ; AVX1-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
   1818 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1819 ;
   1820 ; AVX512-LABEL: test_mm_extract_epi16:
   1821 ; AVX512:       # %bb.0:
   1822 ; AVX512-NEXT:    vpextrw $1, %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc0,0x01]
   1823 ; AVX512-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
   1824 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1825   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   1826   %ext = extractelement <8 x i16> %arg0, i32 1
   1827   %res = zext i16 %ext to i32
   1828   ret i32 %res
   1829 }
   1830 
   1831 define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
   1832 ; X86-SSE-LABEL: test_mm_insert_epi16:
   1833 ; X86-SSE:       # %bb.0:
   1834 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   1835 ; X86-SSE-NEXT:    pinsrw $1, %eax, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc0,0x01]
   1836 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   1837 ;
   1838 ; X86-AVX1-LABEL: test_mm_insert_epi16:
   1839 ; X86-AVX1:       # %bb.0:
   1840 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   1841 ; X86-AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   1842 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   1843 ;
   1844 ; X86-AVX512-LABEL: test_mm_insert_epi16:
   1845 ; X86-AVX512:       # %bb.0:
   1846 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   1847 ; X86-AVX512-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   1848 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   1849 ;
   1850 ; X64-SSE-LABEL: test_mm_insert_epi16:
   1851 ; X64-SSE:       # %bb.0:
   1852 ; X64-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01]
   1853 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   1854 ;
   1855 ; X64-AVX1-LABEL: test_mm_insert_epi16:
   1856 ; X64-AVX1:       # %bb.0:
   1857 ; X64-AVX1-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
   1858 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   1859 ;
   1860 ; X64-AVX512-LABEL: test_mm_insert_epi16:
   1861 ; X64-AVX512:       # %bb.0:
   1862 ; X64-AVX512-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
   1863 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   1864   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   1865   %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
   1866   %bc = bitcast <8 x i16> %res to <2 x i64>
   1867   ret <2 x i64> %bc
   1868 }
   1869 
   1870 define void @test_mm_lfence() nounwind {
   1871 ; CHECK-LABEL: test_mm_lfence:
   1872 ; CHECK:       # %bb.0:
   1873 ; CHECK-NEXT:    lfence # encoding: [0x0f,0xae,0xe8]
   1874 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1875   call void @llvm.x86.sse2.lfence()
   1876   ret void
   1877 }
   1878 declare void @llvm.x86.sse2.lfence() nounwind readnone
   1879 
   1880 define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
   1881 ; X86-SSE-LABEL: test_mm_load_pd:
   1882 ; X86-SSE:       # %bb.0:
   1883 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1884 ; X86-SSE-NEXT:    movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
   1885 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   1886 ;
   1887 ; X86-AVX1-LABEL: test_mm_load_pd:
   1888 ; X86-AVX1:       # %bb.0:
   1889 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1890 ; X86-AVX1-NEXT:    vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
   1891 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   1892 ;
   1893 ; X86-AVX512-LABEL: test_mm_load_pd:
   1894 ; X86-AVX512:       # %bb.0:
   1895 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1896 ; X86-AVX512-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
   1897 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   1898 ;
   1899 ; X64-SSE-LABEL: test_mm_load_pd:
   1900 ; X64-SSE:       # %bb.0:
   1901 ; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
   1902 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   1903 ;
   1904 ; X64-AVX1-LABEL: test_mm_load_pd:
   1905 ; X64-AVX1:       # %bb.0:
   1906 ; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
   1907 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   1908 ;
   1909 ; X64-AVX512-LABEL: test_mm_load_pd:
   1910 ; X64-AVX512:       # %bb.0:
   1911 ; X64-AVX512-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
   1912 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   1913   %arg0 = bitcast double* %a0 to <2 x double>*
   1914   %res = load <2 x double>, <2 x double>* %arg0, align 16
   1915   ret <2 x double> %res
   1916 }
   1917 
   1918 define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
   1919 ; X86-SSE-LABEL: test_mm_load_sd:
   1920 ; X86-SSE:       # %bb.0:
   1921 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1922 ; X86-SSE-NEXT:    movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
   1923 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero
   1924 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   1925 ;
   1926 ; X86-AVX1-LABEL: test_mm_load_sd:
   1927 ; X86-AVX1:       # %bb.0:
   1928 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1929 ; X86-AVX1-NEXT:    vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00]
   1930 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
   1931 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   1932 ;
   1933 ; X86-AVX512-LABEL: test_mm_load_sd:
   1934 ; X86-AVX512:       # %bb.0:
   1935 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1936 ; X86-AVX512-NEXT:    vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
   1937 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
   1938 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   1939 ;
   1940 ; X64-SSE-LABEL: test_mm_load_sd:
   1941 ; X64-SSE:       # %bb.0:
   1942 ; X64-SSE-NEXT:    movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
   1943 ; X64-SSE-NEXT:    # xmm0 = mem[0],zero
   1944 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   1945 ;
   1946 ; X64-AVX1-LABEL: test_mm_load_sd:
   1947 ; X64-AVX1:       # %bb.0:
   1948 ; X64-AVX1-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
   1949 ; X64-AVX1-NEXT:    # xmm0 = mem[0],zero
   1950 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   1951 ;
   1952 ; X64-AVX512-LABEL: test_mm_load_sd:
   1953 ; X64-AVX512:       # %bb.0:
   1954 ; X64-AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
   1955 ; X64-AVX512-NEXT:    # xmm0 = mem[0],zero
   1956 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   1957   %ld = load double, double* %a0, align 1
   1958   %res0 = insertelement <2 x double> undef, double %ld, i32 0
   1959   %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
   1960   ret <2 x double> %res1
   1961 }
   1962 
   1963 define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
   1964 ; X86-SSE-LABEL: test_mm_load_si128:
   1965 ; X86-SSE:       # %bb.0:
   1966 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1967 ; X86-SSE-NEXT:    movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
   1968 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   1969 ;
   1970 ; X86-AVX1-LABEL: test_mm_load_si128:
   1971 ; X86-AVX1:       # %bb.0:
   1972 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1973 ; X86-AVX1-NEXT:    vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
   1974 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   1975 ;
   1976 ; X86-AVX512-LABEL: test_mm_load_si128:
   1977 ; X86-AVX512:       # %bb.0:
   1978 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1979 ; X86-AVX512-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
   1980 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   1981 ;
   1982 ; X64-SSE-LABEL: test_mm_load_si128:
   1983 ; X64-SSE:       # %bb.0:
   1984 ; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
   1985 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   1986 ;
   1987 ; X64-AVX1-LABEL: test_mm_load_si128:
   1988 ; X64-AVX1:       # %bb.0:
   1989 ; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
   1990 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   1991 ;
   1992 ; X64-AVX512-LABEL: test_mm_load_si128:
   1993 ; X64-AVX512:       # %bb.0:
   1994 ; X64-AVX512-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
   1995 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   1996   %res = load <2 x i64>, <2 x i64>* %a0, align 16
   1997   ret <2 x i64> %res
   1998 }
   1999 
   2000 define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
   2001 ; X86-SSE-LABEL: test_mm_load1_pd:
   2002 ; X86-SSE:       # %bb.0:
   2003 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2004 ; X86-SSE-NEXT:    movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
   2005 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero
   2006 ; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
   2007 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
   2008 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   2009 ;
   2010 ; X86-AVX1-LABEL: test_mm_load1_pd:
   2011 ; X86-AVX1:       # %bb.0:
   2012 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2013 ; X86-AVX1-NEXT:    vmovddup (%eax), %xmm0 # encoding: [0xc5,0xfb,0x12,0x00]
   2014 ; X86-AVX1-NEXT:    # xmm0 = mem[0,0]
   2015 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   2016 ;
   2017 ; X86-AVX512-LABEL: test_mm_load1_pd:
   2018 ; X86-AVX512:       # %bb.0:
   2019 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2020 ; X86-AVX512-NEXT:    vmovddup (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x00]
   2021 ; X86-AVX512-NEXT:    # xmm0 = mem[0,0]
   2022 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   2023 ;
   2024 ; X64-SSE-LABEL: test_mm_load1_pd:
   2025 ; X64-SSE:       # %bb.0:
   2026 ; X64-SSE-NEXT:    movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
   2027 ; X64-SSE-NEXT:    # xmm0 = mem[0],zero
   2028 ; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
   2029 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
   2030 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   2031 ;
   2032 ; X64-AVX1-LABEL: test_mm_load1_pd:
   2033 ; X64-AVX1:       # %bb.0:
   2034 ; X64-AVX1-NEXT:    vmovddup (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x12,0x07]
   2035 ; X64-AVX1-NEXT:    # xmm0 = mem[0,0]
   2036 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   2037 ;
   2038 ; X64-AVX512-LABEL: test_mm_load1_pd:
   2039 ; X64-AVX512:       # %bb.0:
   2040 ; X64-AVX512-NEXT:    vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07]
   2041 ; X64-AVX512-NEXT:    # xmm0 = mem[0,0]
   2042 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   2043   %ld = load double, double* %a0, align 8
   2044   %res0 = insertelement <2 x double> undef, double %ld, i32 0
   2045   %res1 = insertelement <2 x double> %res0, double %ld, i32 1
   2046   ret <2 x double> %res1
   2047 }
   2048 
   2049 define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
   2050 ; X86-SSE-LABEL: test_mm_loadh_pd:
   2051 ; X86-SSE:       # %bb.0:
   2052 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2053 ; X86-SSE-NEXT:    movhpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x16,0x00]
   2054 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
   2055 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   2056 ;
   2057 ; X86-AVX1-LABEL: test_mm_loadh_pd:
   2058 ; X86-AVX1:       # %bb.0:
   2059 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2060 ; X86-AVX1-NEXT:    vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00]
   2061 ; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
   2062 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   2063 ;
   2064 ; X86-AVX512-LABEL: test_mm_loadh_pd:
   2065 ; X86-AVX512:       # %bb.0:
   2066 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2067 ; X86-AVX512-NEXT:    vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00]
   2068 ; X86-AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
   2069 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   2070 ;
   2071 ; X64-SSE-LABEL: test_mm_loadh_pd:
   2072 ; X64-SSE:       # %bb.0:
   2073 ; X64-SSE-NEXT:    movhpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x16,0x07]
   2074 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
   2075 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   2076 ;
   2077 ; X64-AVX1-LABEL: test_mm_loadh_pd:
   2078 ; X64-AVX1:       # %bb.0:
   2079 ; X64-AVX1-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07]
   2080 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
   2081 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   2082 ;
   2083 ; X64-AVX512-LABEL: test_mm_loadh_pd:
   2084 ; X64-AVX512:       # %bb.0:
   2085 ; X64-AVX512-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07]
   2086 ; X64-AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
   2087 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   2088   %ld = load double, double* %a1, align 8
   2089   %res = insertelement <2 x double> %a0, double %ld, i32 1
   2090   ret <2 x double> %res
   2091 }
   2092 
   2093 define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
   2094 ; X86-SSE-LABEL: test_mm_loadl_epi64:
   2095 ; X86-SSE:       # %bb.0:
   2096 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2097 ; X86-SSE-NEXT:    movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
   2098 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero
   2099 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   2100 ;
   2101 ; X86-AVX1-LABEL: test_mm_loadl_epi64:
   2102 ; X86-AVX1:       # %bb.0:
   2103 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2104 ; X86-AVX1-NEXT:    vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00]
   2105 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
   2106 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   2107 ;
   2108 ; X86-AVX512-LABEL: test_mm_loadl_epi64:
   2109 ; X86-AVX512:       # %bb.0:
   2110 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2111 ; X86-AVX512-NEXT:    vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
   2112 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
   2113 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   2114 ;
   2115 ; X64-SSE-LABEL: test_mm_loadl_epi64:
   2116 ; X64-SSE:       # %bb.0:
   2117 ; X64-SSE-NEXT:    movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
   2118 ; X64-SSE-NEXT:    # xmm0 = mem[0],zero
   2119 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   2120 ;
   2121 ; X64-AVX1-LABEL: test_mm_loadl_epi64:
   2122 ; X64-AVX1:       # %bb.0:
   2123 ; X64-AVX1-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
   2124 ; X64-AVX1-NEXT:    # xmm0 = mem[0],zero
   2125 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   2126 ;
   2127 ; X64-AVX512-LABEL: test_mm_loadl_epi64:
   2128 ; X64-AVX512:       # %bb.0:
   2129 ; X64-AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
   2130 ; X64-AVX512-NEXT:    # xmm0 = mem[0],zero
   2131 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   2132   %bc = bitcast <2 x i64>* %a1 to i64*
   2133   %ld = load i64, i64* %bc, align 1
   2134   %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
   2135   %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
   2136   ret <2 x i64> %res1
   2137 }
   2138 
   2139 define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
   2140 ; X86-SSE-LABEL: test_mm_loadl_pd:
   2141 ; X86-SSE:       # %bb.0:
   2142 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2143 ; X86-SSE-NEXT:    movlpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x12,0x00]
   2144 ; X86-SSE-NEXT:    # xmm0 = mem[0],xmm0[1]
   2145 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   2146 ;
   2147 ; X86-AVX1-LABEL: test_mm_loadl_pd:
   2148 ; X86-AVX1:       # %bb.0:
   2149 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2150 ; X86-AVX1-NEXT:    vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00]
   2151 ; X86-AVX1-NEXT:    # xmm0 = mem[0],xmm0[1]
   2152 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   2153 ;
   2154 ; X86-AVX512-LABEL: test_mm_loadl_pd:
   2155 ; X86-AVX512:       # %bb.0:
   2156 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2157 ; X86-AVX512-NEXT:    vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00]
   2158 ; X86-AVX512-NEXT:    # xmm0 = mem[0],xmm0[1]
   2159 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   2160 ;
   2161 ; X64-SSE-LABEL: test_mm_loadl_pd:
   2162 ; X64-SSE:       # %bb.0:
   2163 ; X64-SSE-NEXT:    movlpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x12,0x07]
   2164 ; X64-SSE-NEXT:    # xmm0 = mem[0],xmm0[1]
   2165 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   2166 ;
   2167 ; X64-AVX1-LABEL: test_mm_loadl_pd:
   2168 ; X64-AVX1:       # %bb.0:
   2169 ; X64-AVX1-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07]
   2170 ; X64-AVX1-NEXT:    # xmm0 = mem[0],xmm0[1]
   2171 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   2172 ;
   2173 ; X64-AVX512-LABEL: test_mm_loadl_pd:
   2174 ; X64-AVX512:       # %bb.0:
   2175 ; X64-AVX512-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07]
   2176 ; X64-AVX512-NEXT:    # xmm0 = mem[0],xmm0[1]
   2177 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   2178   %ld = load double, double* %a1, align 8
   2179   %res = insertelement <2 x double> %a0, double %ld, i32 0
   2180   ret <2 x double> %res
   2181 }
   2182 
   2183 define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
   2184 ; X86-SSE-LABEL: test_mm_loadr_pd:
   2185 ; X86-SSE:       # %bb.0:
   2186 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2187 ; X86-SSE-NEXT:    movapd (%eax), %xmm0 # encoding: [0x66,0x0f,0x28,0x00]
   2188 ; X86-SSE-NEXT:    shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
   2189 ; X86-SSE-NEXT:    # xmm0 = xmm0[1,0]
   2190 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   2191 ;
   2192 ; X86-AVX1-LABEL: test_mm_loadr_pd:
   2193 ; X86-AVX1:       # %bb.0:
   2194 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2195 ; X86-AVX1-NEXT:    vpermilpd $1, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01]
   2196 ; X86-AVX1-NEXT:    # xmm0 = mem[1,0]
   2197 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   2198 ;
   2199 ; X86-AVX512-LABEL: test_mm_loadr_pd:
   2200 ; X86-AVX512:       # %bb.0:
   2201 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2202 ; X86-AVX512-NEXT:    vpermilpd $1, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01]
   2203 ; X86-AVX512-NEXT:    # xmm0 = mem[1,0]
   2204 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   2205 ;
   2206 ; X64-SSE-LABEL: test_mm_loadr_pd:
   2207 ; X64-SSE:       # %bb.0:
   2208 ; X64-SSE-NEXT:    movapd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x28,0x07]
   2209 ; X64-SSE-NEXT:    shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
   2210 ; X64-SSE-NEXT:    # xmm0 = xmm0[1,0]
   2211 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   2212 ;
   2213 ; X64-AVX1-LABEL: test_mm_loadr_pd:
   2214 ; X64-AVX1:       # %bb.0:
   2215 ; X64-AVX1-NEXT:    vpermilpd $1, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
   2216 ; X64-AVX1-NEXT:    # xmm0 = mem[1,0]
   2217 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   2218 ;
   2219 ; X64-AVX512-LABEL: test_mm_loadr_pd:
   2220 ; X64-AVX512:       # %bb.0:
   2221 ; X64-AVX512-NEXT:    vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
   2222 ; X64-AVX512-NEXT:    # xmm0 = mem[1,0]
   2223 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   2224   %arg0 = bitcast double* %a0 to <2 x double>*
   2225   %ld = load <2 x double>, <2 x double>* %arg0, align 16
   2226   %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   2227   ret <2 x double> %res
   2228 }
   2229 
   2230 define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
   2231 ; X86-SSE-LABEL: test_mm_loadu_pd:
   2232 ; X86-SSE:       # %bb.0:
   2233 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2234 ; X86-SSE-NEXT:    movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
   2235 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   2236 ;
   2237 ; X86-AVX1-LABEL: test_mm_loadu_pd:
   2238 ; X86-AVX1:       # %bb.0:
   2239 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2240 ; X86-AVX1-NEXT:    vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
   2241 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   2242 ;
   2243 ; X86-AVX512-LABEL: test_mm_loadu_pd:
   2244 ; X86-AVX512:       # %bb.0:
   2245 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2246 ; X86-AVX512-NEXT:    vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
   2247 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   2248 ;
   2249 ; X64-SSE-LABEL: test_mm_loadu_pd:
   2250 ; X64-SSE:       # %bb.0:
   2251 ; X64-SSE-NEXT:    movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
   2252 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   2253 ;
   2254 ; X64-AVX1-LABEL: test_mm_loadu_pd:
   2255 ; X64-AVX1:       # %bb.0:
   2256 ; X64-AVX1-NEXT:    vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
   2257 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   2258 ;
   2259 ; X64-AVX512-LABEL: test_mm_loadu_pd:
   2260 ; X64-AVX512:       # %bb.0:
   2261 ; X64-AVX512-NEXT:    vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
   2262 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   2263   %arg0 = bitcast double* %a0 to <2 x double>*
   2264   %res = load <2 x double>, <2 x double>* %arg0, align 1
   2265   ret <2 x double> %res
   2266 }
   2267 
   2268 define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
   2269 ; X86-SSE-LABEL: test_mm_loadu_si128:
   2270 ; X86-SSE:       # %bb.0:
   2271 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2272 ; X86-SSE-NEXT:    movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
   2273 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   2274 ;
   2275 ; X86-AVX1-LABEL: test_mm_loadu_si128:
   2276 ; X86-AVX1:       # %bb.0:
   2277 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2278 ; X86-AVX1-NEXT:    vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
   2279 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   2280 ;
   2281 ; X86-AVX512-LABEL: test_mm_loadu_si128:
   2282 ; X86-AVX512:       # %bb.0:
   2283 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2284 ; X86-AVX512-NEXT:    vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
   2285 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   2286 ;
   2287 ; X64-SSE-LABEL: test_mm_loadu_si128:
   2288 ; X64-SSE:       # %bb.0:
   2289 ; X64-SSE-NEXT:    movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
   2290 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   2291 ;
   2292 ; X64-AVX1-LABEL: test_mm_loadu_si128:
   2293 ; X64-AVX1:       # %bb.0:
   2294 ; X64-AVX1-NEXT:    vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
   2295 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   2296 ;
   2297 ; X64-AVX512-LABEL: test_mm_loadu_si128:
   2298 ; X64-AVX512:       # %bb.0:
   2299 ; X64-AVX512-NEXT:    vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
   2300 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   2301   %res = load <2 x i64>, <2 x i64>* %a0, align 1
   2302   ret <2 x i64> %res
   2303 }
   2304 
   2305 define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   2306 ; SSE-LABEL: test_mm_madd_epi16:
   2307 ; SSE:       # %bb.0:
   2308 ; SSE-NEXT:    pmaddwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf5,0xc1]
   2309 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2310 ;
   2311 ; AVX1-LABEL: test_mm_madd_epi16:
   2312 ; AVX1:       # %bb.0:
   2313 ; AVX1-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf5,0xc1]
   2314 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2315 ;
   2316 ; AVX512-LABEL: test_mm_madd_epi16:
   2317 ; AVX512:       # %bb.0:
   2318 ; AVX512-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
   2319 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2320   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   2321   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   2322   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
   2323   %bc = bitcast <4 x i32> %res to <2 x i64>
   2324   ret <2 x i64> %bc
   2325 }
   2326 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
   2327 
   2328 define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
   2329 ; X86-SSE-LABEL: test_mm_maskmoveu_si128:
   2330 ; X86-SSE:       # %bb.0:
   2331 ; X86-SSE-NEXT:    pushl %edi # encoding: [0x57]
   2332 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08]
   2333 ; X86-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1]
   2334 ; X86-SSE-NEXT:    popl %edi # encoding: [0x5f]
   2335 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   2336 ;
   2337 ; X86-AVX-LABEL: test_mm_maskmoveu_si128:
   2338 ; X86-AVX:       # %bb.0:
   2339 ; X86-AVX-NEXT:    pushl %edi # encoding: [0x57]
   2340 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08]
   2341 ; X86-AVX-NEXT:    vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
   2342 ; X86-AVX-NEXT:    popl %edi # encoding: [0x5f]
   2343 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
   2344 ;
   2345 ; X64-SSE-LABEL: test_mm_maskmoveu_si128:
   2346 ; X64-SSE:       # %bb.0:
   2347 ; X64-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1]
   2348 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   2349 ;
   2350 ; X64-AVX-LABEL: test_mm_maskmoveu_si128:
   2351 ; X64-AVX:       # %bb.0:
   2352 ; X64-AVX-NEXT:    vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
   2353 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
   2354   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   2355   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   2356   call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
   2357   ret void
   2358 }
   2359 declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
   2360 
   2361 define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   2362 ; SSE-LABEL: test_mm_max_epi16:
   2363 ; SSE:       # %bb.0:
   2364 ; SSE-NEXT:    pmaxsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xee,0xc1]
   2365 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2366 ;
   2367 ; AVX1-LABEL: test_mm_max_epi16:
   2368 ; AVX1:       # %bb.0:
   2369 ; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xee,0xc1]
   2370 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2371 ;
   2372 ; AVX512-LABEL: test_mm_max_epi16:
   2373 ; AVX512:       # %bb.0:
   2374 ; AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
   2375 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2376   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   2377   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   2378   %cmp = icmp sgt <8 x i16> %arg0, %arg1
   2379   %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
   2380   %bc = bitcast <8 x i16> %sel to <2 x i64>
   2381   ret <2 x i64> %bc
   2382 }
   2383 
   2384 define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   2385 ; SSE-LABEL: test_mm_max_epu8:
   2386 ; SSE:       # %bb.0:
   2387 ; SSE-NEXT:    pmaxub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xde,0xc1]
   2388 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2389 ;
   2390 ; AVX1-LABEL: test_mm_max_epu8:
   2391 ; AVX1:       # %bb.0:
   2392 ; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xde,0xc1]
   2393 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2394 ;
   2395 ; AVX512-LABEL: test_mm_max_epu8:
   2396 ; AVX512:       # %bb.0:
   2397 ; AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
   2398 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2399   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   2400   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   2401   %cmp = icmp ugt <16 x i8> %arg0, %arg1
   2402   %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
   2403   %bc = bitcast <16 x i8> %sel to <2 x i64>
   2404   ret <2 x i64> %bc
   2405 }
   2406 
   2407 define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   2408 ; SSE-LABEL: test_mm_max_pd:
   2409 ; SSE:       # %bb.0:
   2410 ; SSE-NEXT:    maxpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5f,0xc1]
   2411 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2412 ;
   2413 ; AVX1-LABEL: test_mm_max_pd:
   2414 ; AVX1:       # %bb.0:
   2415 ; AVX1-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5f,0xc1]
   2416 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2417 ;
   2418 ; AVX512-LABEL: test_mm_max_pd:
   2419 ; AVX512:       # %bb.0:
   2420 ; AVX512-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1]
   2421 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2422   %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
   2423   ret <2 x double> %res
   2424 }
   2425 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
   2426 
   2427 define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   2428 ; SSE-LABEL: test_mm_max_sd:
   2429 ; SSE:       # %bb.0:
   2430 ; SSE-NEXT:    maxsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5f,0xc1]
   2431 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2432 ;
   2433 ; AVX1-LABEL: test_mm_max_sd:
   2434 ; AVX1:       # %bb.0:
   2435 ; AVX1-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5f,0xc1]
   2436 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2437 ;
   2438 ; AVX512-LABEL: test_mm_max_sd:
   2439 ; AVX512:       # %bb.0:
   2440 ; AVX512-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1]
   2441 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2442   %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
   2443   ret <2 x double> %res
   2444 }
   2445 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
   2446 
   2447 define void @test_mm_mfence() nounwind {
   2448 ; CHECK-LABEL: test_mm_mfence:
   2449 ; CHECK:       # %bb.0:
   2450 ; CHECK-NEXT:    mfence # encoding: [0x0f,0xae,0xf0]
   2451 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2452   call void @llvm.x86.sse2.mfence()
   2453   ret void
   2454 }
   2455 declare void @llvm.x86.sse2.mfence() nounwind readnone
   2456 
   2457 define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   2458 ; SSE-LABEL: test_mm_min_epi16:
   2459 ; SSE:       # %bb.0:
   2460 ; SSE-NEXT:    pminsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xea,0xc1]
   2461 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2462 ;
   2463 ; AVX1-LABEL: test_mm_min_epi16:
   2464 ; AVX1:       # %bb.0:
   2465 ; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xea,0xc1]
   2466 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2467 ;
   2468 ; AVX512-LABEL: test_mm_min_epi16:
   2469 ; AVX512:       # %bb.0:
   2470 ; AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
   2471 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2472   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   2473   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   2474   %cmp = icmp slt <8 x i16> %arg0, %arg1
   2475   %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
   2476   %bc = bitcast <8 x i16> %sel to <2 x i64>
   2477   ret <2 x i64> %bc
   2478 }
   2479 
   2480 define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   2481 ; SSE-LABEL: test_mm_min_epu8:
   2482 ; SSE:       # %bb.0:
   2483 ; SSE-NEXT:    pminub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xda,0xc1]
   2484 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2485 ;
   2486 ; AVX1-LABEL: test_mm_min_epu8:
   2487 ; AVX1:       # %bb.0:
   2488 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xda,0xc1]
   2489 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2490 ;
   2491 ; AVX512-LABEL: test_mm_min_epu8:
   2492 ; AVX512:       # %bb.0:
   2493 ; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
   2494 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2495   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   2496   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   2497   %cmp = icmp ult <16 x i8> %arg0, %arg1
   2498   %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
   2499   %bc = bitcast <16 x i8> %sel to <2 x i64>
   2500   ret <2 x i64> %bc
   2501 }
   2502 
   2503 define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   2504 ; SSE-LABEL: test_mm_min_pd:
   2505 ; SSE:       # %bb.0:
   2506 ; SSE-NEXT:    minpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5d,0xc1]
   2507 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2508 ;
   2509 ; AVX1-LABEL: test_mm_min_pd:
   2510 ; AVX1:       # %bb.0:
   2511 ; AVX1-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5d,0xc1]
   2512 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2513 ;
   2514 ; AVX512-LABEL: test_mm_min_pd:
   2515 ; AVX512:       # %bb.0:
   2516 ; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1]
   2517 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2518   %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
   2519   ret <2 x double> %res
   2520 }
   2521 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
   2522 
   2523 define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   2524 ; SSE-LABEL: test_mm_min_sd:
   2525 ; SSE:       # %bb.0:
   2526 ; SSE-NEXT:    minsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5d,0xc1]
   2527 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2528 ;
   2529 ; AVX1-LABEL: test_mm_min_sd:
   2530 ; AVX1:       # %bb.0:
   2531 ; AVX1-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5d,0xc1]
   2532 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2533 ;
   2534 ; AVX512-LABEL: test_mm_min_sd:
   2535 ; AVX512:       # %bb.0:
   2536 ; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1]
   2537 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2538   %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
   2539   ret <2 x double> %res
   2540 }
   2541 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
   2542 
   2543 define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
   2544 ; SSE-LABEL: test_mm_move_epi64:
   2545 ; SSE:       # %bb.0:
   2546 ; SSE-NEXT:    movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
   2547 ; SSE-NEXT:    # xmm0 = xmm0[0],zero
   2548 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2549 ;
   2550 ; AVX1-LABEL: test_mm_move_epi64:
   2551 ; AVX1:       # %bb.0:
   2552 ; AVX1-NEXT:    vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
   2553 ; AVX1-NEXT:    # xmm0 = xmm0[0],zero
   2554 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2555 ;
   2556 ; AVX512-LABEL: test_mm_move_epi64:
   2557 ; AVX512:       # %bb.0:
   2558 ; AVX512-NEXT:    vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
   2559 ; AVX512-NEXT:    # xmm0 = xmm0[0],zero
   2560 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2561   %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
   2562   ret <2 x i64> %res
   2563 }
   2564 
   2565 define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   2566 ; SSE-LABEL: test_mm_move_sd:
   2567 ; SSE:       # %bb.0:
   2568 ; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
   2569 ; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
   2570 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2571 ;
   2572 ; AVX-LABEL: test_mm_move_sd:
   2573 ; AVX:       # %bb.0:
   2574 ; AVX-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
   2575 ; AVX-NEXT:    # xmm0 = xmm1[0,1],xmm0[2,3]
   2576 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2577   %ext0 = extractelement <2 x double> %a1, i32 0
   2578   %res0 = insertelement <2 x double> undef, double %ext0, i32 0
   2579   %ext1 = extractelement <2 x double> %a0, i32 1
   2580   %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
   2581   ret <2 x double> %res1
   2582 }
   2583 
   2584 define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
   2585 ; SSE-LABEL: test_mm_movemask_epi8:
   2586 ; SSE:       # %bb.0:
   2587 ; SSE-NEXT:    pmovmskb %xmm0, %eax # encoding: [0x66,0x0f,0xd7,0xc0]
   2588 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2589 ;
   2590 ; AVX-LABEL: test_mm_movemask_epi8:
   2591 ; AVX:       # %bb.0:
   2592 ; AVX-NEXT:    vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0]
   2593 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2594   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   2595   %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
   2596   ret i32 %res
   2597 }
   2598 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
   2599 
   2600 define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
   2601 ; SSE-LABEL: test_mm_movemask_pd:
   2602 ; SSE:       # %bb.0:
   2603 ; SSE-NEXT:    movmskpd %xmm0, %eax # encoding: [0x66,0x0f,0x50,0xc0]
   2604 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2605 ;
   2606 ; AVX-LABEL: test_mm_movemask_pd:
   2607 ; AVX:       # %bb.0:
   2608 ; AVX-NEXT:    vmovmskpd %xmm0, %eax # encoding: [0xc5,0xf9,0x50,0xc0]
   2609 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2610   %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
   2611   ret i32 %res
   2612 }
   2613 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
   2614 
   2615 define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   2616 ; X86-SSE-LABEL: test_mm_mul_epu32:
   2617 ; X86-SSE:       # %bb.0:
   2618 ; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
   2619 ; X86-SSE-NEXT:    # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
   2620 ; X86-SSE-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
   2621 ; X86-SSE-NEXT:    pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2]
   2622 ; X86-SSE-NEXT:    pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca]
   2623 ; X86-SSE-NEXT:    pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
   2624 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   2625 ;
   2626 ; AVX1-LABEL: test_mm_mul_epu32:
   2627 ; AVX1:       # %bb.0:
   2628 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
   2629 ; AVX1-NEXT:    vpblendw $204, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc2,0xcc]
   2630 ; AVX1-NEXT:    # xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
   2631 ; AVX1-NEXT:    vpblendw $204, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x0e,0xca,0xcc]
   2632 ; AVX1-NEXT:    # xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
   2633 ; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf4,0xc1]
   2634 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2635 ;
   2636 ; AVX512-LABEL: test_mm_mul_epu32:
   2637 ; AVX512:       # %bb.0:
   2638 ; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
   2639 ; AVX512-NEXT:    vpblendd $10, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc2,0x0a]
   2640 ; AVX512-NEXT:    # xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
   2641 ; AVX512-NEXT:    vpblendd $10, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x02,0xca,0x0a]
   2642 ; AVX512-NEXT:    # xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
   2643 ; AVX512-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
   2644 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2645 ;
   2646 ; X64-SSE-LABEL: test_mm_mul_epu32:
   2647 ; X64-SSE:       # %bb.0:
   2648 ; X64-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
   2649 ; X64-SSE-NEXT:    # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
   2650 ; X64-SSE-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
   2651 ; X64-SSE-NEXT:    pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2]
   2652 ; X64-SSE-NEXT:    pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca]
   2653 ; X64-SSE-NEXT:    pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
   2654 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   2655   %A = and <2 x i64> %a0, <i64 4294967295, i64 4294967295>
   2656   %B = and <2 x i64> %a1, <i64 4294967295, i64 4294967295>
   2657   %res = mul nuw <2 x i64> %A, %B
   2658   ret <2 x i64> %res
   2659 }
   2660 
   2661 define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   2662 ; SSE-LABEL: test_mm_mul_pd:
   2663 ; SSE:       # %bb.0:
   2664 ; SSE-NEXT:    mulpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x59,0xc1]
   2665 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2666 ;
   2667 ; AVX1-LABEL: test_mm_mul_pd:
   2668 ; AVX1:       # %bb.0:
   2669 ; AVX1-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x59,0xc1]
   2670 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2671 ;
   2672 ; AVX512-LABEL: test_mm_mul_pd:
   2673 ; AVX512:       # %bb.0:
   2674 ; AVX512-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x59,0xc1]
   2675 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2676   %res = fmul <2 x double> %a0, %a1
   2677   ret <2 x double> %res
   2678 }
   2679 
   2680 define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   2681 ; SSE-LABEL: test_mm_mul_sd:
   2682 ; SSE:       # %bb.0:
   2683 ; SSE-NEXT:    mulsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x59,0xc1]
   2684 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2685 ;
   2686 ; AVX1-LABEL: test_mm_mul_sd:
   2687 ; AVX1:       # %bb.0:
   2688 ; AVX1-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x59,0xc1]
   2689 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2690 ;
   2691 ; AVX512-LABEL: test_mm_mul_sd:
   2692 ; AVX512:       # %bb.0:
   2693 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1]
   2694 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2695   %ext0 = extractelement <2 x double> %a0, i32 0
   2696   %ext1 = extractelement <2 x double> %a1, i32 0
   2697   %fmul = fmul double %ext0, %ext1
   2698   %res = insertelement <2 x double> %a0, double %fmul, i32 0
   2699   ret <2 x double> %res
   2700 }
   2701 
   2702 define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
   2703 ; SSE-LABEL: test_mm_mulhi_epi16:
   2704 ; SSE:       # %bb.0:
   2705 ; SSE-NEXT:    pmulhw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe5,0xc1]
   2706 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2707 ;
   2708 ; AVX1-LABEL: test_mm_mulhi_epi16:
   2709 ; AVX1:       # %bb.0:
   2710 ; AVX1-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe5,0xc1]
   2711 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2712 ;
   2713 ; AVX512-LABEL: test_mm_mulhi_epi16:
   2714 ; AVX512:       # %bb.0:
   2715 ; AVX512-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
   2716 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2717   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   2718   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   2719   %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
   2720   %bc = bitcast <8 x i16> %res to <2 x i64>
   2721   ret <2 x i64> %bc
   2722 }
   2723 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
   2724 
   2725 define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
   2726 ; SSE-LABEL: test_mm_mulhi_epu16:
   2727 ; SSE:       # %bb.0:
   2728 ; SSE-NEXT:    pmulhuw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe4,0xc1]
   2729 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2730 ;
   2731 ; AVX1-LABEL: test_mm_mulhi_epu16:
   2732 ; AVX1:       # %bb.0:
   2733 ; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe4,0xc1]
   2734 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2735 ;
   2736 ; AVX512-LABEL: test_mm_mulhi_epu16:
   2737 ; AVX512:       # %bb.0:
   2738 ; AVX512-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
   2739 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2740   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   2741   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   2742   %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
   2743   %bc = bitcast <8 x i16> %res to <2 x i64>
   2744   ret <2 x i64> %bc
   2745 }
   2746 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
   2747 
   2748 define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
   2749 ; SSE-LABEL: test_mm_mullo_epi16:
   2750 ; SSE:       # %bb.0:
   2751 ; SSE-NEXT:    pmullw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd5,0xc1]
   2752 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2753 ;
   2754 ; AVX1-LABEL: test_mm_mullo_epi16:
   2755 ; AVX1:       # %bb.0:
   2756 ; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd5,0xc1]
   2757 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2758 ;
   2759 ; AVX512-LABEL: test_mm_mullo_epi16:
   2760 ; AVX512:       # %bb.0:
   2761 ; AVX512-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1]
   2762 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2763   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   2764   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   2765   %res = mul <8 x i16> %arg0, %arg1
   2766   %bc = bitcast <8 x i16> %res to <2 x i64>
   2767   ret <2 x i64> %bc
   2768 }
   2769 
   2770 define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   2771 ; SSE-LABEL: test_mm_or_pd:
   2772 ; SSE:       # %bb.0:
   2773 ; SSE-NEXT:    orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1]
   2774 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2775 ;
   2776 ; AVX1-LABEL: test_mm_or_pd:
   2777 ; AVX1:       # %bb.0:
   2778 ; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
   2779 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2780 ;
   2781 ; AVX512-LABEL: test_mm_or_pd:
   2782 ; AVX512:       # %bb.0:
   2783 ; AVX512-NEXT:    vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
   2784 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2785   %arg0 = bitcast <2 x double> %a0 to <4 x i32>
   2786   %arg1 = bitcast <2 x double> %a1 to <4 x i32>
   2787   %res = or <4 x i32> %arg0, %arg1
   2788   %bc = bitcast <4 x i32> %res to <2 x double>
   2789   ret <2 x double> %bc
   2790 }
   2791 
   2792 define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   2793 ; SSE-LABEL: test_mm_or_si128:
   2794 ; SSE:       # %bb.0:
   2795 ; SSE-NEXT:    orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1]
   2796 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2797 ;
   2798 ; AVX1-LABEL: test_mm_or_si128:
   2799 ; AVX1:       # %bb.0:
   2800 ; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
   2801 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2802 ;
   2803 ; AVX512-LABEL: test_mm_or_si128:
   2804 ; AVX512:       # %bb.0:
   2805 ; AVX512-NEXT:    vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
   2806 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2807   %res = or <2 x i64> %a0, %a1
   2808   ret <2 x i64> %res
   2809 }
   2810 
   2811 define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
   2812 ; SSE-LABEL: test_mm_packs_epi16:
   2813 ; SSE:       # %bb.0:
   2814 ; SSE-NEXT:    packsswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x63,0xc1]
   2815 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2816 ;
   2817 ; AVX1-LABEL: test_mm_packs_epi16:
   2818 ; AVX1:       # %bb.0:
   2819 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x63,0xc1]
   2820 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2821 ;
   2822 ; AVX512-LABEL: test_mm_packs_epi16:
   2823 ; AVX512:       # %bb.0:
   2824 ; AVX512-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
   2825 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2826   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   2827   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   2828   %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
   2829   %bc = bitcast <16 x i8> %res to <2 x i64>
   2830   ret <2 x i64> %bc
   2831 }
   2832 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
   2833 
   2834 define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
   2835 ; SSE-LABEL: test_mm_packs_epi32:
   2836 ; SSE:       # %bb.0:
   2837 ; SSE-NEXT:    packssdw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6b,0xc1]
   2838 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2839 ;
   2840 ; AVX1-LABEL: test_mm_packs_epi32:
   2841 ; AVX1:       # %bb.0:
   2842 ; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x6b,0xc1]
   2843 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2844 ;
   2845 ; AVX512-LABEL: test_mm_packs_epi32:
   2846 ; AVX512:       # %bb.0:
   2847 ; AVX512-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
   2848 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2849   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   2850   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   2851   %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
   2852   %bc = bitcast <8 x i16> %res to <2 x i64>
   2853   ret <2 x i64> %bc
   2854 }
   2855 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
   2856 
   2857 define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
   2858 ; SSE-LABEL: test_mm_packus_epi16:
   2859 ; SSE:       # %bb.0:
   2860 ; SSE-NEXT:    packuswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x67,0xc1]
   2861 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2862 ;
   2863 ; AVX1-LABEL: test_mm_packus_epi16:
   2864 ; AVX1:       # %bb.0:
   2865 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x67,0xc1]
   2866 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2867 ;
   2868 ; AVX512-LABEL: test_mm_packus_epi16:
   2869 ; AVX512:       # %bb.0:
   2870 ; AVX512-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
   2871 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2872   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   2873   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   2874   %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
   2875   %bc = bitcast <16 x i8> %res to <2 x i64>
   2876   ret <2 x i64> %bc
   2877 }
   2878 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
   2879 
   2880 define void @test_mm_pause() nounwind {
   2881 ; CHECK-LABEL: test_mm_pause:
   2882 ; CHECK:       # %bb.0:
   2883 ; CHECK-NEXT:    pause # encoding: [0xf3,0x90]
   2884 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2885   call void @llvm.x86.sse2.pause()
   2886   ret void
   2887 }
   2888 declare void @llvm.x86.sse2.pause() nounwind readnone
   2889 
   2890 define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   2891 ; SSE-LABEL: test_mm_sad_epu8:
   2892 ; SSE:       # %bb.0:
   2893 ; SSE-NEXT:    psadbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf6,0xc1]
   2894 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2895 ;
   2896 ; AVX1-LABEL: test_mm_sad_epu8:
   2897 ; AVX1:       # %bb.0:
   2898 ; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf6,0xc1]
   2899 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2900 ;
   2901 ; AVX512-LABEL: test_mm_sad_epu8:
   2902 ; AVX512:       # %bb.0:
   2903 ; AVX512-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1]
   2904 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2905   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   2906   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   2907   %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
   2908   ret <2 x i64> %res
   2909 }
   2910 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
   2911 
   2912 define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
   2913 ; X86-SSE-LABEL: test_mm_set_epi8:
   2914 ; X86-SSE:       # %bb.0:
   2915 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2916 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   2917 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   2918 ; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
   2919 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
   2920 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   2921 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
   2922 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   2923 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   2924 ; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   2925 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   2926 ; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   2927 ; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
   2928 ; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
   2929 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
   2930 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   2931 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   2932 ; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   2933 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
   2934 ; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
   2935 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
   2936 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   2937 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   2938 ; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
   2939 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
   2940 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   2941 ; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
   2942 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   2943 ; X86-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
   2944 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   2945 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
   2946 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   2947 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   2948 ; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   2949 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   2950 ; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   2951 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
   2952 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   2953 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   2954 ; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   2955 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
   2956 ; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
   2957 ; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
   2958 ; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   2959 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
   2960 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   2961 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   2962 ; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   2963 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   2964 ; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   2965 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
   2966 ; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
   2967 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
   2968 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   2969 ; X86-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
   2970 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
   2971 ; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
   2972 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2973 ; X86-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
   2974 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
   2975 ; X86-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
   2976 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   2977 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   2978 ;
   2979 ; X86-AVX1-LABEL: test_mm_set_epi8:
   2980 ; X86-AVX1:       # %bb.0:
   2981 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
   2982 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40]
   2983 ; X86-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
   2984 ; X86-AVX1-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
   2985 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   2986 ; X86-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
   2987 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
   2988 ; X86-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
   2989 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   2990 ; X86-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
   2991 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
   2992 ; X86-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
   2993 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   2994 ; X86-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
   2995 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
   2996 ; X86-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
   2997 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   2998 ; X86-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
   2999 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
   3000 ; X86-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
   3001 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   3002 ; X86-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
   3003 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
   3004 ; X86-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
   3005 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   3006 ; X86-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
   3007 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
   3008 ; X86-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
   3009 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3010 ; X86-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
   3011 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3012 ; X86-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
   3013 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3014 ;
   3015 ; X86-AVX512-LABEL: test_mm_set_epi8:
   3016 ; X86-AVX512:       # %bb.0:
   3017 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
   3018 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40]
   3019 ; X86-AVX512-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
   3020 ; X86-AVX512-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
   3021 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   3022 ; X86-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
   3023 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
   3024 ; X86-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
   3025 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   3026 ; X86-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
   3027 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
   3028 ; X86-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
   3029 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   3030 ; X86-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
   3031 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
   3032 ; X86-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
   3033 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   3034 ; X86-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
   3035 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
   3036 ; X86-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
   3037 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   3038 ; X86-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
   3039 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
   3040 ; X86-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
   3041 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   3042 ; X86-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
   3043 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
   3044 ; X86-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
   3045 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3046 ; X86-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
   3047 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3048 ; X86-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
   3049 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3050 ;
   3051 ; X64-SSE-LABEL: test_mm_set_epi8:
   3052 ; X64-SSE:       # %bb.0:
   3053 ; X64-SSE-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
   3054 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3055 ; X64-SSE-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
   3056 ; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
   3057 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
   3058 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   3059 ; X64-SSE-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
   3060 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3061 ; X64-SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   3062 ; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   3063 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   3064 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   3065 ; X64-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
   3066 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
   3067 ; X64-SSE-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
   3068 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3069 ; X64-SSE-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
   3070 ; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   3071 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
   3072 ; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
   3073 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3074 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3075 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   3076 ; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
   3077 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
   3078 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   3079 ; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
   3080 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   3081 ; X64-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
   3082 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   3083 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   3084 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3085 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   3086 ; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   3087 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   3088 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   3089 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   3090 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3091 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   3092 ; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   3093 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
   3094 ; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
   3095 ; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
   3096 ; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   3097 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   3098 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3099 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
   3100 ; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   3101 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   3102 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   3103 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
   3104 ; X64-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
   3105 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
   3106 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3107 ; X64-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
   3108 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
   3109 ; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
   3110 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   3111 ; X64-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
   3112 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
   3113 ; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
   3114 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   3115 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3116 ;
   3117 ; X64-AVX1-LABEL: test_mm_set_epi8:
   3118 ; X64-AVX1:       # %bb.0:
   3119 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48]
   3120 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
   3121 ; X64-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
   3122 ; X64-AVX1-NEXT:    vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01]
   3123 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
   3124 ; X64-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
   3125 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   3126 ; X64-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
   3127 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   3128 ; X64-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
   3129 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   3130 ; X64-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
   3131 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   3132 ; X64-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
   3133 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   3134 ; X64-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
   3135 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   3136 ; X64-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
   3137 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3138 ; X64-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
   3139 ; X64-AVX1-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
   3140 ; X64-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
   3141 ; X64-AVX1-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
   3142 ; X64-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
   3143 ; X64-AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   3144 ; X64-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
   3145 ; X64-AVX1-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
   3146 ; X64-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
   3147 ; X64-AVX1-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
   3148 ; X64-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
   3149 ; X64-AVX1-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
   3150 ; X64-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
   3151 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3152 ;
   3153 ; X64-AVX512-LABEL: test_mm_set_epi8:
   3154 ; X64-AVX512:       # %bb.0:
   3155 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48]
   3156 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
   3157 ; X64-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
   3158 ; X64-AVX512-NEXT:    vpinsrb $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01]
   3159 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
   3160 ; X64-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
   3161 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   3162 ; X64-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
   3163 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   3164 ; X64-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
   3165 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   3166 ; X64-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
   3167 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   3168 ; X64-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
   3169 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   3170 ; X64-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
   3171 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   3172 ; X64-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
   3173 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3174 ; X64-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
   3175 ; X64-AVX512-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
   3176 ; X64-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
   3177 ; X64-AVX512-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
   3178 ; X64-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
   3179 ; X64-AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   3180 ; X64-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
   3181 ; X64-AVX512-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
   3182 ; X64-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
   3183 ; X64-AVX512-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
   3184 ; X64-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
   3185 ; X64-AVX512-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
   3186 ; X64-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
   3187 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3188   %res0  = insertelement <16 x i8> undef,  i8 %a15, i32 0
   3189   %res1  = insertelement <16 x i8> %res0,  i8 %a14, i32 1
   3190   %res2  = insertelement <16 x i8> %res1,  i8 %a13, i32 2
   3191   %res3  = insertelement <16 x i8> %res2,  i8 %a12, i32 3
   3192   %res4  = insertelement <16 x i8> %res3,  i8 %a11, i32 4
   3193   %res5  = insertelement <16 x i8> %res4,  i8 %a10, i32 5
   3194   %res6  = insertelement <16 x i8> %res5,  i8 %a9 , i32 6
   3195   %res7  = insertelement <16 x i8> %res6,  i8 %a8 , i32 7
   3196   %res8  = insertelement <16 x i8> %res7,  i8 %a7 , i32 8
   3197   %res9  = insertelement <16 x i8> %res8,  i8 %a6 , i32 9
   3198   %res10 = insertelement <16 x i8> %res9,  i8 %a5 , i32 10
   3199   %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
   3200   %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
   3201   %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
   3202   %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
   3203   %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
   3204   %res = bitcast <16 x i8> %res15 to <2 x i64>
   3205   ret <2 x i64> %res
   3206 }
   3207 
   3208 define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
   3209 ; X86-SSE-LABEL: test_mm_set_epi16:
   3210 ; X86-SSE:       # %bb.0:
   3211 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   3212 ; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
   3213 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
   3214 ; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   3215 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
   3216 ; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   3217 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
   3218 ; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
   3219 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
   3220 ; X86-SSE-NEXT:    movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8]
   3221 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
   3222 ; X86-SSE-NEXT:    movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0]
   3223 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
   3224 ; X86-SSE-NEXT:    movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8]
   3225 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
   3226 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3227 ; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
   3228 ; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
   3229 ; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3]
   3230 ; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
   3231 ; X86-SSE-NEXT:    punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2]
   3232 ; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
   3233 ; X86-SSE-NEXT:    punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5]
   3234 ; X86-SSE-NEXT:    # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
   3235 ; X86-SSE-NEXT:    punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7]
   3236 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
   3237 ; X86-SSE-NEXT:    punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6]
   3238 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
   3239 ; X86-SSE-NEXT:    punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4]
   3240 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0]
   3241 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3242 ;
   3243 ; X86-AVX1-LABEL: test_mm_set_epi16:
   3244 ; X86-AVX1:       # %bb.0:
   3245 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
   3246 ; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
   3247 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
   3248 ; X86-AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   3249 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
   3250 ; X86-AVX1-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   3251 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
   3252 ; X86-AVX1-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
   3253 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
   3254 ; X86-AVX1-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   3255 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
   3256 ; X86-AVX1-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   3257 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
   3258 ; X86-AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   3259 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   3260 ; X86-AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   3261 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3262 ;
   3263 ; X86-AVX512-LABEL: test_mm_set_epi16:
   3264 ; X86-AVX512:       # %bb.0:
   3265 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
   3266 ; X86-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
   3267 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
   3268 ; X86-AVX512-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   3269 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
   3270 ; X86-AVX512-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   3271 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
   3272 ; X86-AVX512-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
   3273 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
   3274 ; X86-AVX512-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   3275 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
   3276 ; X86-AVX512-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   3277 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
   3278 ; X86-AVX512-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   3279 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   3280 ; X86-AVX512-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   3281 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3282 ;
   3283 ; X64-SSE-LABEL: test_mm_set_epi16:
   3284 ; X64-SSE:       # %bb.0:
   3285 ; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10]
   3286 ; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
   3287 ; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
   3288 ; X64-SSE-NEXT:    movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
   3289 ; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
   3290 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   3291 ; X64-SSE-NEXT:    movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2]
   3292 ; X64-SSE-NEXT:    movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1]
   3293 ; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
   3294 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
   3295 ; X64-SSE-NEXT:    punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
   3296 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
   3297 ; X64-SSE-NEXT:    movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0]
   3298 ; X64-SSE-NEXT:    movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9]
   3299 ; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
   3300 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   3301 ; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   3302 ; X64-SSE-NEXT:    movd %r10d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc2]
   3303 ; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
   3304 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
   3305 ; X64-SSE-NEXT:    punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
   3306 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   3307 ; X64-SSE-NEXT:    punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
   3308 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0]
   3309 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3310 ;
   3311 ; X64-AVX1-LABEL: test_mm_set_epi16:
   3312 ; X64-AVX1:       # %bb.0:
   3313 ; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
   3314 ; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
   3315 ; X64-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
   3316 ; X64-AVX1-NEXT:    vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
   3317 ; X64-AVX1-NEXT:    vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
   3318 ; X64-AVX1-NEXT:    vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
   3319 ; X64-AVX1-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   3320 ; X64-AVX1-NEXT:    vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
   3321 ; X64-AVX1-NEXT:    vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
   3322 ; X64-AVX1-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   3323 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3324 ;
   3325 ; X64-AVX512-LABEL: test_mm_set_epi16:
   3326 ; X64-AVX512:       # %bb.0:
   3327 ; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
   3328 ; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
   3329 ; X64-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
   3330 ; X64-AVX512-NEXT:    vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
   3331 ; X64-AVX512-NEXT:    vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
   3332 ; X64-AVX512-NEXT:    vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
   3333 ; X64-AVX512-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   3334 ; X64-AVX512-NEXT:    vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
   3335 ; X64-AVX512-NEXT:    vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
   3336 ; X64-AVX512-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   3337 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3338   %res0  = insertelement <8 x i16> undef, i16 %a7, i32 0
   3339   %res1  = insertelement <8 x i16> %res0, i16 %a6, i32 1
   3340   %res2  = insertelement <8 x i16> %res1, i16 %a5, i32 2
   3341   %res3  = insertelement <8 x i16> %res2, i16 %a4, i32 3
   3342   %res4  = insertelement <8 x i16> %res3, i16 %a3, i32 4
   3343   %res5  = insertelement <8 x i16> %res4, i16 %a2, i32 5
   3344   %res6  = insertelement <8 x i16> %res5, i16 %a1, i32 6
   3345   %res7  = insertelement <8 x i16> %res6, i16 %a0, i32 7
   3346   %res = bitcast <8 x i16> %res7 to <2 x i64>
   3347   ret <2 x i64> %res
   3348 }
   3349 
   3350 define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
   3351 ; X86-SSE-LABEL: test_mm_set_epi32:
   3352 ; X86-SSE:       # %bb.0:
   3353 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
   3354 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3355 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08]
   3356 ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
   3357 ; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
   3358 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
   3359 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x0c]
   3360 ; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
   3361 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
   3362 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3363 ; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
   3364 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   3365 ; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
   3366 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   3367 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3368 ;
   3369 ; X86-AVX1-LABEL: test_mm_set_epi32:
   3370 ; X86-AVX1:       # %bb.0:
   3371 ; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10]
   3372 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3373 ; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01]
   3374 ; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02]
   3375 ; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03]
   3376 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3377 ;
   3378 ; X86-AVX512-LABEL: test_mm_set_epi32:
   3379 ; X86-AVX512:       # %bb.0:
   3380 ; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10]
   3381 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3382 ; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01]
   3383 ; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02]
   3384 ; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03]
   3385 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3386 ;
   3387 ; X64-SSE-LABEL: test_mm_set_epi32:
   3388 ; X64-SSE:       # %bb.0:
   3389 ; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
   3390 ; X64-SSE-NEXT:    movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
   3391 ; X64-SSE-NEXT:    punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
   3392 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
   3393 ; X64-SSE-NEXT:    movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2]
   3394 ; X64-SSE-NEXT:    movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
   3395 ; X64-SSE-NEXT:    punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
   3396 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   3397 ; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
   3398 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   3399 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3400 ;
   3401 ; X64-AVX1-LABEL: test_mm_set_epi32:
   3402 ; X64-AVX1:       # %bb.0:
   3403 ; X64-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
   3404 ; X64-AVX1-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
   3405 ; X64-AVX1-NEXT:    vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
   3406 ; X64-AVX1-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
   3407 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3408 ;
   3409 ; X64-AVX512-LABEL: test_mm_set_epi32:
   3410 ; X64-AVX512:       # %bb.0:
   3411 ; X64-AVX512-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
   3412 ; X64-AVX512-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
   3413 ; X64-AVX512-NEXT:    vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
   3414 ; X64-AVX512-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
   3415 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3416   %res0  = insertelement <4 x i32> undef, i32 %a3, i32 0
   3417   %res1  = insertelement <4 x i32> %res0, i32 %a2, i32 1
   3418   %res2  = insertelement <4 x i32> %res1, i32 %a1, i32 2
   3419   %res3  = insertelement <4 x i32> %res2, i32 %a0, i32 3
   3420   %res = bitcast <4 x i32> %res3 to <2 x i64>
   3421   ret <2 x i64> %res
   3422 }
   3423 
   3424 ; TODO test_mm_set_epi64
   3425 
   3426 define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
   3427 ; X86-SSE-LABEL: test_mm_set_epi64x:
   3428 ; X86-SSE:       # %bb.0:
   3429 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04]
   3430 ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
   3431 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08]
   3432 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3433 ; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
   3434 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
   3435 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x0c]
   3436 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3437 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x10]
   3438 ; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
   3439 ; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
   3440 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   3441 ; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
   3442 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   3443 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3444 ;
   3445 ; X86-AVX1-LABEL: test_mm_set_epi64x:
   3446 ; X86-AVX1:       # %bb.0:
   3447 ; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c]
   3448 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3449 ; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01]
   3450 ; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02]
   3451 ; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03]
   3452 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3453 ;
   3454 ; X86-AVX512-LABEL: test_mm_set_epi64x:
   3455 ; X86-AVX512:       # %bb.0:
   3456 ; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c]
   3457 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3458 ; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01]
   3459 ; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02]
   3460 ; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03]
   3461 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3462 ;
   3463 ; X64-SSE-LABEL: test_mm_set_epi64x:
   3464 ; X64-SSE:       # %bb.0:
   3465 ; X64-SSE-NEXT:    movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf]
   3466 ; X64-SSE-NEXT:    movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6]
   3467 ; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
   3468 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   3469 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3470 ;
   3471 ; X64-AVX1-LABEL: test_mm_set_epi64x:
   3472 ; X64-AVX1:       # %bb.0:
   3473 ; X64-AVX1-NEXT:    vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
   3474 ; X64-AVX1-NEXT:    vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
   3475 ; X64-AVX1-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
   3476 ; X64-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
   3477 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3478 ;
   3479 ; X64-AVX512-LABEL: test_mm_set_epi64x:
   3480 ; X64-AVX512:       # %bb.0:
   3481 ; X64-AVX512-NEXT:    vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
   3482 ; X64-AVX512-NEXT:    vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
   3483 ; X64-AVX512-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
   3484 ; X64-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
   3485 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3486   %res0  = insertelement <2 x i64> undef, i64 %a1, i32 0
   3487   %res1  = insertelement <2 x i64> %res0, i64 %a0, i32 1
   3488   ret <2 x i64> %res1
   3489 }
   3490 
   3491 define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
   3492 ; X86-SSE-LABEL: test_mm_set_pd:
   3493 ; X86-SSE:       # %bb.0:
   3494 ; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x0c]
   3495 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero
   3496 ; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x04]
   3497 ; X86-SSE-NEXT:    # xmm1 = mem[0],zero
   3498 ; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
   3499 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   3500 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3501 ;
   3502 ; X86-AVX1-LABEL: test_mm_set_pd:
   3503 ; X86-AVX1:       # %bb.0:
   3504 ; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
   3505 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
   3506 ; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
   3507 ; X86-AVX1-NEXT:    # xmm1 = mem[0],zero
   3508 ; X86-AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
   3509 ; X86-AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   3510 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3511 ;
   3512 ; X86-AVX512-LABEL: test_mm_set_pd:
   3513 ; X86-AVX512:       # %bb.0:
   3514 ; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
   3515 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
   3516 ; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
   3517 ; X86-AVX512-NEXT:    # xmm1 = mem[0],zero
   3518 ; X86-AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
   3519 ; X86-AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   3520 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3521 ;
   3522 ; X64-SSE-LABEL: test_mm_set_pd:
   3523 ; X64-SSE:       # %bb.0:
   3524 ; X64-SSE-NEXT:    movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8]
   3525 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0]
   3526 ; X64-SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
   3527 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3528 ;
   3529 ; X64-AVX1-LABEL: test_mm_set_pd:
   3530 ; X64-AVX1:       # %bb.0:
   3531 ; X64-AVX1-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0]
   3532 ; X64-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
   3533 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3534 ;
   3535 ; X64-AVX512-LABEL: test_mm_set_pd:
   3536 ; X64-AVX512:       # %bb.0:
   3537 ; X64-AVX512-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
   3538 ; X64-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
   3539 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3540   %res0  = insertelement <2 x double> undef, double %a1, i32 0
   3541   %res1  = insertelement <2 x double> %res0, double %a0, i32 1
   3542   ret <2 x double> %res1
   3543 }
   3544 
   3545 define <2 x double> @test_mm_set_pd1(double %a0) nounwind {
   3546 ; X86-SSE-LABEL: test_mm_set_pd1:
   3547 ; X86-SSE:       # %bb.0:
   3548 ; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
   3549 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero
   3550 ; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
   3551 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
   3552 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3553 ;
   3554 ; X86-AVX1-LABEL: test_mm_set_pd1:
   3555 ; X86-AVX1:       # %bb.0:
   3556 ; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
   3557 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
   3558 ; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
   3559 ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
   3560 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3561 ;
   3562 ; X86-AVX512-LABEL: test_mm_set_pd1:
   3563 ; X86-AVX512:       # %bb.0:
   3564 ; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
   3565 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
   3566 ; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
   3567 ; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
   3568 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3569 ;
   3570 ; X64-SSE-LABEL: test_mm_set_pd1:
   3571 ; X64-SSE:       # %bb.0:
   3572 ; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
   3573 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
   3574 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3575 ;
   3576 ; X64-AVX1-LABEL: test_mm_set_pd1:
   3577 ; X64-AVX1:       # %bb.0:
   3578 ; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
   3579 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
   3580 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3581 ;
   3582 ; X64-AVX512-LABEL: test_mm_set_pd1:
   3583 ; X64-AVX512:       # %bb.0:
   3584 ; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
   3585 ; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
   3586 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3587   %res0  = insertelement <2 x double> undef, double %a0, i32 0
   3588   %res1  = insertelement <2 x double> %res0, double %a0, i32 1
   3589   ret <2 x double> %res1
   3590 }
   3591 
   3592 define <2 x double> @test_mm_set_sd(double %a0) nounwind {
   3593 ; X86-SSE-LABEL: test_mm_set_sd:
   3594 ; X86-SSE:       # %bb.0:
   3595 ; X86-SSE-NEXT:    movq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x7e,0x44,0x24,0x04]
   3596 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero
   3597 ; X86-SSE-NEXT:    movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
   3598 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],zero
   3599 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3600 ;
   3601 ; X86-AVX1-LABEL: test_mm_set_sd:
   3602 ; X86-AVX1:       # %bb.0:
   3603 ; X86-AVX1-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04]
   3604 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
   3605 ; X86-AVX1-NEXT:    vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
   3606 ; X86-AVX1-NEXT:    # xmm0 = xmm0[0],zero
   3607 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3608 ;
   3609 ; X86-AVX512-LABEL: test_mm_set_sd:
   3610 ; X86-AVX512:       # %bb.0:
   3611 ; X86-AVX512-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04]
   3612 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
   3613 ; X86-AVX512-NEXT:    vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
   3614 ; X86-AVX512-NEXT:    # xmm0 = xmm0[0],zero
   3615 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3616 ;
   3617 ; X64-SSE-LABEL: test_mm_set_sd:
   3618 ; X64-SSE:       # %bb.0:
   3619 ; X64-SSE-NEXT:    movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
   3620 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],zero
   3621 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3622 ;
   3623 ; X64-AVX1-LABEL: test_mm_set_sd:
   3624 ; X64-AVX1:       # %bb.0:
   3625 ; X64-AVX1-NEXT:    vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
   3626 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0],zero
   3627 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3628 ;
   3629 ; X64-AVX512-LABEL: test_mm_set_sd:
   3630 ; X64-AVX512:       # %bb.0:
   3631 ; X64-AVX512-NEXT:    vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
   3632 ; X64-AVX512-NEXT:    # xmm0 = xmm0[0],zero
   3633 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3634   %res0  = insertelement <2 x double> undef, double %a0, i32 0
   3635   %res1  = insertelement <2 x double> %res0, double 0.0, i32 1
   3636   ret <2 x double> %res1
   3637 }
   3638 
   3639 define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
   3640 ; X86-SSE-LABEL: test_mm_set1_epi8:
   3641 ; X86-SSE:       # %bb.0:
   3642 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3643 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3644 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0]
   3645 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   3646 ; X86-SSE-NEXT:    pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0]
   3647 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
   3648 ; X86-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
   3649 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
   3650 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3651 ;
   3652 ; X86-AVX1-LABEL: test_mm_set1_epi8:
   3653 ; X86-AVX1:       # %bb.0:
   3654 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3655 ; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
   3656 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
   3657 ; X86-AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
   3658 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3659 ;
   3660 ; X86-AVX512-LABEL: test_mm_set1_epi8:
   3661 ; X86-AVX512:       # %bb.0:
   3662 ; X86-AVX512-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
   3663 ; X86-AVX512-NEXT:    vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0]
   3664 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3665 ;
   3666 ; X64-SSE-LABEL: test_mm_set1_epi8:
   3667 ; X64-SSE:       # %bb.0:
   3668 ; X64-SSE-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
   3669 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3670 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0]
   3671 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   3672 ; X64-SSE-NEXT:    pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0]
   3673 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
   3674 ; X64-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
   3675 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
   3676 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3677 ;
   3678 ; X64-AVX1-LABEL: test_mm_set1_epi8:
   3679 ; X64-AVX1:       # %bb.0:
   3680 ; X64-AVX1-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
   3681 ; X64-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
   3682 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
   3683 ; X64-AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
   3684 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3685 ;
   3686 ; X64-AVX512-LABEL: test_mm_set1_epi8:
   3687 ; X64-AVX512:       # %bb.0:
   3688 ; X64-AVX512-NEXT:    vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7]
   3689 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3690   %res0  = insertelement <16 x i8> undef,  i8 %a0, i32 0
   3691   %res1  = insertelement <16 x i8> %res0,  i8 %a0, i32 1
   3692   %res2  = insertelement <16 x i8> %res1,  i8 %a0, i32 2
   3693   %res3  = insertelement <16 x i8> %res2,  i8 %a0, i32 3
   3694   %res4  = insertelement <16 x i8> %res3,  i8 %a0, i32 4
   3695   %res5  = insertelement <16 x i8> %res4,  i8 %a0, i32 5
   3696   %res6  = insertelement <16 x i8> %res5,  i8 %a0, i32 6
   3697   %res7  = insertelement <16 x i8> %res6,  i8 %a0, i32 7
   3698   %res8  = insertelement <16 x i8> %res7,  i8 %a0, i32 8
   3699   %res9  = insertelement <16 x i8> %res8,  i8 %a0, i32 9
   3700   %res10 = insertelement <16 x i8> %res9,  i8 %a0, i32 10
   3701   %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
   3702   %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
   3703   %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
   3704   %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
   3705   %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
   3706   %res = bitcast <16 x i8> %res15 to <2 x i64>
   3707   ret <2 x i64> %res
   3708 }
   3709 
   3710 define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
   3711 ; X86-SSE-LABEL: test_mm_set1_epi16:
   3712 ; X86-SSE:       # %bb.0:
   3713 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   3714 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3715 ; X86-SSE-NEXT:    pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0]
   3716 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
   3717 ; X86-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
   3718 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
   3719 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3720 ;
   3721 ; X86-AVX1-LABEL: test_mm_set1_epi16:
   3722 ; X86-AVX1:       # %bb.0:
   3723 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   3724 ; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
   3725 ; X86-AVX1-NEXT:    vpshuflw $224, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0xe0]
   3726 ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
   3727 ; X86-AVX1-NEXT:    vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
   3728 ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
   3729 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3730 ;
   3731 ; X86-AVX512-LABEL: test_mm_set1_epi16:
   3732 ; X86-AVX512:       # %bb.0:
   3733 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   3734 ; X86-AVX512-NEXT:    vpbroadcastw %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc0]
   3735 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3736 ;
   3737 ; X64-SSE-LABEL: test_mm_set1_epi16:
   3738 ; X64-SSE:       # %bb.0:
   3739 ; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
   3740 ; X64-SSE-NEXT:    pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0]
   3741 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
   3742 ; X64-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
   3743 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
   3744 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3745 ;
   3746 ; X64-AVX1-LABEL: test_mm_set1_epi16:
   3747 ; X64-AVX1:       # %bb.0:
   3748 ; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
   3749 ; X64-AVX1-NEXT:    vpshuflw $224, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0xe0]
   3750 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
   3751 ; X64-AVX1-NEXT:    vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
   3752 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
   3753 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3754 ;
   3755 ; X64-AVX512-LABEL: test_mm_set1_epi16:
   3756 ; X64-AVX512:       # %bb.0:
   3757 ; X64-AVX512-NEXT:    vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7]
   3758 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3759   %res0  = insertelement <8 x i16> undef, i16 %a0, i32 0
   3760   %res1  = insertelement <8 x i16> %res0, i16 %a0, i32 1
   3761   %res2  = insertelement <8 x i16> %res1, i16 %a0, i32 2
   3762   %res3  = insertelement <8 x i16> %res2, i16 %a0, i32 3
   3763   %res4  = insertelement <8 x i16> %res3, i16 %a0, i32 4
   3764   %res5  = insertelement <8 x i16> %res4, i16 %a0, i32 5
   3765   %res6  = insertelement <8 x i16> %res5, i16 %a0, i32 6
   3766   %res7  = insertelement <8 x i16> %res6, i16 %a0, i32 7
   3767   %res = bitcast <8 x i16> %res7 to <2 x i64>
   3768   ret <2 x i64> %res
   3769 }
   3770 
   3771 define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
   3772 ; X86-SSE-LABEL: test_mm_set1_epi32:
   3773 ; X86-SSE:       # %bb.0:
   3774 ; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04]
   3775 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3776 ; X86-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
   3777 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
   3778 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3779 ;
   3780 ; X86-AVX1-LABEL: test_mm_set1_epi32:
   3781 ; X86-AVX1:       # %bb.0:
   3782 ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
   3783 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3784 ; X86-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
   3785 ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
   3786 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3787 ;
   3788 ; X86-AVX512-LABEL: test_mm_set1_epi32:
   3789 ; X86-AVX512:       # %bb.0:
   3790 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3791 ; X86-AVX512-NEXT:    vpbroadcastd %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc0]
   3792 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3793 ;
   3794 ; X64-SSE-LABEL: test_mm_set1_epi32:
   3795 ; X64-SSE:       # %bb.0:
   3796 ; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
   3797 ; X64-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
   3798 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
   3799 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3800 ;
   3801 ; X64-AVX1-LABEL: test_mm_set1_epi32:
   3802 ; X64-AVX1:       # %bb.0:
   3803 ; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
   3804 ; X64-AVX1-NEXT:    vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
   3805 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
   3806 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3807 ;
   3808 ; X64-AVX512-LABEL: test_mm_set1_epi32:
   3809 ; X64-AVX512:       # %bb.0:
   3810 ; X64-AVX512-NEXT:    vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7]
   3811 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3812   %res0  = insertelement <4 x i32> undef, i32 %a0, i32 0
   3813   %res1  = insertelement <4 x i32> %res0, i32 %a0, i32 1
   3814   %res2  = insertelement <4 x i32> %res1, i32 %a0, i32 2
   3815   %res3  = insertelement <4 x i32> %res2, i32 %a0, i32 3
   3816   %res = bitcast <4 x i32> %res3 to <2 x i64>
   3817   ret <2 x i64> %res
   3818 }
   3819 
   3820 ; TODO test_mm_set1_epi64
   3821 
   3822 define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
   3823 ; X86-SSE-LABEL: test_mm_set1_epi64x:
   3824 ; X86-SSE:       # %bb.0:
   3825 ; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04]
   3826 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3827 ; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %xmm1 # encoding: [0x66,0x0f,0x6e,0x4c,0x24,0x08]
   3828 ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
   3829 ; X86-SSE-NEXT:    punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
   3830 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   3831 ; X86-SSE-NEXT:    pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44]
   3832 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,1,0,1]
   3833 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3834 ;
   3835 ; X86-AVX1-LABEL: test_mm_set1_epi64x:
   3836 ; X86-AVX1:       # %bb.0:
   3837 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   3838 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   3839 ; X86-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
   3840 ; X86-AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
   3841 ; X86-AVX1-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
   3842 ; X86-AVX1-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
   3843 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3844 ;
   3845 ; X86-AVX512-LABEL: test_mm_set1_epi64x:
   3846 ; X86-AVX512:       # %bb.0:
   3847 ; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
   3848 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
   3849 ; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
   3850 ; X86-AVX512-NEXT:    vpbroadcastq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xc0]
   3851 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3852 ;
   3853 ; X64-SSE-LABEL: test_mm_set1_epi64x:
   3854 ; X64-SSE:       # %bb.0:
   3855 ; X64-SSE-NEXT:    movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
   3856 ; X64-SSE-NEXT:    pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44]
   3857 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,1,0,1]
   3858 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3859 ;
   3860 ; X64-AVX1-LABEL: test_mm_set1_epi64x:
   3861 ; X64-AVX1:       # %bb.0:
   3862 ; X64-AVX1-NEXT:    vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
   3863 ; X64-AVX1-NEXT:    vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44]
   3864 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,1,0,1]
   3865 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3866 ;
   3867 ; X64-AVX512-LABEL: test_mm_set1_epi64x:
   3868 ; X64-AVX512:       # %bb.0:
   3869 ; X64-AVX512-NEXT:    vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7]
   3870 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3871   %res0  = insertelement <2 x i64> undef, i64 %a0, i32 0
   3872   %res1  = insertelement <2 x i64> %res0, i64 %a0, i32 1
   3873   ret <2 x i64> %res1
   3874 }
   3875 
   3876 define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
   3877 ; X86-SSE-LABEL: test_mm_set1_pd:
   3878 ; X86-SSE:       # %bb.0:
   3879 ; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
   3880 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero
   3881 ; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
   3882 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
   3883 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3884 ;
   3885 ; X86-AVX1-LABEL: test_mm_set1_pd:
   3886 ; X86-AVX1:       # %bb.0:
   3887 ; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
   3888 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
   3889 ; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
   3890 ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
   3891 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   3892 ;
   3893 ; X86-AVX512-LABEL: test_mm_set1_pd:
   3894 ; X86-AVX512:       # %bb.0:
   3895 ; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
   3896 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
   3897 ; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
   3898 ; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
   3899 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   3900 ;
   3901 ; X64-SSE-LABEL: test_mm_set1_pd:
   3902 ; X64-SSE:       # %bb.0:
   3903 ; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
   3904 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
   3905 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   3906 ;
   3907 ; X64-AVX1-LABEL: test_mm_set1_pd:
   3908 ; X64-AVX1:       # %bb.0:
   3909 ; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
   3910 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
   3911 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   3912 ;
   3913 ; X64-AVX512-LABEL: test_mm_set1_pd:
   3914 ; X64-AVX512:       # %bb.0:
   3915 ; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
   3916 ; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
   3917 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   3918   %res0  = insertelement <2 x double> undef, double %a0, i32 0
   3919   %res1  = insertelement <2 x double> %res0, double %a0, i32 1
   3920   ret <2 x double> %res1
   3921 }
   3922 
   3923 define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
   3924 ; X86-SSE-LABEL: test_mm_setr_epi8:
   3925 ; X86-SSE:       # %bb.0:
   3926 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
   3927 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3928 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
   3929 ; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
   3930 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
   3931 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   3932 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   3933 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3934 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
   3935 ; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   3936 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   3937 ; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   3938 ; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
   3939 ; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
   3940 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   3941 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3942 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
   3943 ; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   3944 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
   3945 ; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
   3946 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   3947 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3948 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
   3949 ; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
   3950 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
   3951 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   3952 ; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
   3953 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   3954 ; X86-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
   3955 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   3956 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   3957 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3958 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
   3959 ; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   3960 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   3961 ; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   3962 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   3963 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3964 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
   3965 ; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   3966 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
   3967 ; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
   3968 ; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
   3969 ; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   3970 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   3971 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3972 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
   3973 ; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   3974 ; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   3975 ; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   3976 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3977 ; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
   3978 ; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3979 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   3980 ; X86-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
   3981 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
   3982 ; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
   3983 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   3984 ; X86-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
   3985 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
   3986 ; X86-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
   3987 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   3988 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   3989 ;
   3990 ; X86-AVX1-LABEL: test_mm_setr_epi8:
   3991 ; X86-AVX1:       # %bb.0:
   3992 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3993 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
   3994 ; X86-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
   3995 ; X86-AVX1-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
   3996 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
   3997 ; X86-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
   3998 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   3999 ; X86-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
   4000 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
   4001 ; X86-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
   4002 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   4003 ; X86-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
   4004 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
   4005 ; X86-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
   4006 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   4007 ; X86-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
   4008 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
   4009 ; X86-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
   4010 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   4011 ; X86-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
   4012 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
   4013 ; X86-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
   4014 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   4015 ; X86-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
   4016 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
   4017 ; X86-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
   4018 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   4019 ; X86-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
   4020 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
   4021 ; X86-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
   4022 ; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
   4023 ; X86-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
   4024 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   4025 ;
   4026 ; X86-AVX512-LABEL: test_mm_setr_epi8:
   4027 ; X86-AVX512:       # %bb.0:
   4028 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   4029 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
   4030 ; X86-AVX512-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
   4031 ; X86-AVX512-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
   4032 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
   4033 ; X86-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
   4034 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   4035 ; X86-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
   4036 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
   4037 ; X86-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
   4038 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   4039 ; X86-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
   4040 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
   4041 ; X86-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
   4042 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   4043 ; X86-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
   4044 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
   4045 ; X86-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
   4046 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   4047 ; X86-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
   4048 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
   4049 ; X86-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
   4050 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   4051 ; X86-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
   4052 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
   4053 ; X86-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
   4054 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   4055 ; X86-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
   4056 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
   4057 ; X86-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
   4058 ; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
   4059 ; X86-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
   4060 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   4061 ;
   4062 ; X64-SSE-LABEL: test_mm_setr_epi8:
   4063 ; X64-SSE:       # %bb.0:
   4064 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
   4065 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   4066 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
   4067 ; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
   4068 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
   4069 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   4070 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
   4071 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   4072 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   4073 ; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   4074 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   4075 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   4076 ; X64-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
   4077 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
   4078 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   4079 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   4080 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   4081 ; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   4082 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
   4083 ; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
   4084 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   4085 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   4086 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   4087 ; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
   4088 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
   4089 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   4090 ; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
   4091 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   4092 ; X64-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
   4093 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   4094 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   4095 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   4096 ; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   4097 ; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   4098 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   4099 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   4100 ; X64-SSE-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
   4101 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   4102 ; X64-SSE-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
   4103 ; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   4104 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
   4105 ; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
   4106 ; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
   4107 ; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   4108 ; X64-SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   4109 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   4110 ; X64-SSE-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
   4111 ; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   4112 ; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
   4113 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
   4114 ; X64-SSE-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
   4115 ; X64-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
   4116 ; X64-SSE-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
   4117 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   4118 ; X64-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
   4119 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
   4120 ; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
   4121 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   4122 ; X64-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
   4123 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
   4124 ; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
   4125 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   4126 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   4127 ;
   4128 ; X64-AVX1-LABEL: test_mm_setr_epi8:
   4129 ; X64-AVX1:       # %bb.0:
   4130 ; X64-AVX1-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
   4131 ; X64-AVX1-NEXT:    movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
   4132 ; X64-AVX1-NEXT:    vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6]
   4133 ; X64-AVX1-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
   4134 ; X64-AVX1-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
   4135 ; X64-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
   4136 ; X64-AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   4137 ; X64-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
   4138 ; X64-AVX1-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
   4139 ; X64-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
   4140 ; X64-AVX1-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
   4141 ; X64-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
   4142 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   4143 ; X64-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
   4144 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   4145 ; X64-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
   4146 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   4147 ; X64-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
   4148 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   4149 ; X64-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
   4150 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   4151 ; X64-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
   4152 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   4153 ; X64-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
   4154 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   4155 ; X64-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
   4156 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
   4157 ; X64-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
   4158 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
   4159 ; X64-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
   4160 ; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
   4161 ; X64-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
   4162 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   4163 ;
   4164 ; X64-AVX512-LABEL: test_mm_setr_epi8:
   4165 ; X64-AVX512:       # %bb.0:
   4166 ; X64-AVX512-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
   4167 ; X64-AVX512-NEXT:    movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
   4168 ; X64-AVX512-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
   4169 ; X64-AVX512-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
   4170 ; X64-AVX512-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
   4171 ; X64-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
   4172 ; X64-AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   4173 ; X64-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
   4174 ; X64-AVX512-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
   4175 ; X64-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
   4176 ; X64-AVX512-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
   4177 ; X64-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
   4178 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   4179 ; X64-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
   4180 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
   4181 ; X64-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
   4182 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
   4183 ; X64-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
   4184 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
   4185 ; X64-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
   4186 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
   4187 ; X64-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
   4188 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
   4189 ; X64-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
   4190 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
   4191 ; X64-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
   4192 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
   4193 ; X64-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
   4194 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
   4195 ; X64-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
   4196 ; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
   4197 ; X64-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
   4198 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   4199   %res0  = insertelement <16 x i8> undef,  i8 %a0 , i32 0
   4200   %res1  = insertelement <16 x i8> %res0,  i8 %a1 , i32 1
   4201   %res2  = insertelement <16 x i8> %res1,  i8 %a2 , i32 2
   4202   %res3  = insertelement <16 x i8> %res2,  i8 %a3 , i32 3
   4203   %res4  = insertelement <16 x i8> %res3,  i8 %a4 , i32 4
   4204   %res5  = insertelement <16 x i8> %res4,  i8 %a5 , i32 5
   4205   %res6  = insertelement <16 x i8> %res5,  i8 %a6 , i32 6
   4206   %res7  = insertelement <16 x i8> %res6,  i8 %a7 , i32 7
   4207   %res8  = insertelement <16 x i8> %res7,  i8 %a8 , i32 8
   4208   %res9  = insertelement <16 x i8> %res8,  i8 %a9 , i32 9
   4209   %res10 = insertelement <16 x i8> %res9,  i8 %a10, i32 10
   4210   %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
   4211   %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
   4212   %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
   4213   %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
   4214   %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
   4215   %res = bitcast <16 x i8> %res15 to <2 x i64>
   4216   ret <2 x i64> %res
   4217 }
   4218 
   4219 define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
   4220 ; X86-SSE-LABEL: test_mm_setr_epi16:
   4221 ; X86-SSE:       # %bb.0:
   4222 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
   4223 ; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
   4224 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
   4225 ; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
   4226 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
   4227 ; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
   4228 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
   4229 ; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
   4230 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
   4231 ; X86-SSE-NEXT:    movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8]
   4232 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
   4233 ; X86-SSE-NEXT:    movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0]
   4234 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
   4235 ; X86-SSE-NEXT:    movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8]
   4236 ; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   4237 ; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   4238 ; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
   4239 ; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
   4240 ; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3]
   4241 ; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
   4242 ; X86-SSE-NEXT:    punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2]
   4243 ; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
   4244 ; X86-SSE-NEXT:    punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5]
   4245 ; X86-SSE-NEXT:    # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
   4246 ; X86-SSE-NEXT:    punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7]
   4247 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
   4248 ; X86-SSE-NEXT:    punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6]
   4249 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
   4250 ; X86-SSE-NEXT:    punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4]
   4251 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0]
   4252 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   4253 ;
   4254 ; X86-AVX1-LABEL: test_mm_setr_epi16:
   4255 ; X86-AVX1:       # %bb.0:
   4256 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   4257 ; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
   4258 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
   4259 ; X86-AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   4260 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
   4261 ; X86-AVX1-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   4262 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
   4263 ; X86-AVX1-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
   4264 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
   4265 ; X86-AVX1-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   4266 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
   4267 ; X86-AVX1-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   4268 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
   4269 ; X86-AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   4270 ; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
   4271 ; X86-AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   4272 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   4273 ;
   4274 ; X86-AVX512-LABEL: test_mm_setr_epi16:
   4275 ; X86-AVX512:       # %bb.0:
   4276 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   4277 ; X86-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
   4278 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
   4279 ; X86-AVX512-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   4280 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
   4281 ; X86-AVX512-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   4282 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
   4283 ; X86-AVX512-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
   4284 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
   4285 ; X86-AVX512-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   4286 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
   4287 ; X86-AVX512-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   4288 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
   4289 ; X86-AVX512-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   4290 ; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
   4291 ; X86-AVX512-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   4292 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   4293 ;
   4294 ; X64-SSE-LABEL: test_mm_setr_epi16:
   4295 ; X64-SSE:       # %bb.0:
   4296 ; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
   4297 ; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
   4298 ; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
   4299 ; X64-SSE-NEXT:    movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca]
   4300 ; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
   4301 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   4302 ; X64-SSE-NEXT:    movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1]
   4303 ; X64-SSE-NEXT:    movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0]
   4304 ; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
   4305 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
   4306 ; X64-SSE-NEXT:    punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
   4307 ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
   4308 ; X64-SSE-NEXT:    movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
   4309 ; X64-SSE-NEXT:    movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
   4310 ; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
   4311 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   4312 ; X64-SSE-NEXT:    movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde]
   4313 ; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
   4314 ; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
   4315 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
   4316 ; X64-SSE-NEXT:    punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
   4317 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   4318 ; X64-SSE-NEXT:    punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
   4319 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0]
   4320 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   4321 ;
   4322 ; X64-AVX1-LABEL: test_mm_setr_epi16:
   4323 ; X64-AVX1:       # %bb.0:
   4324 ; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10]
   4325 ; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
   4326 ; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
   4327 ; X64-AVX1-NEXT:    vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
   4328 ; X64-AVX1-NEXT:    vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
   4329 ; X64-AVX1-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
   4330 ; X64-AVX1-NEXT:    vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
   4331 ; X64-AVX1-NEXT:    vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
   4332 ; X64-AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   4333 ; X64-AVX1-NEXT:    vpinsrw $7, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07]
   4334 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   4335 ;
   4336 ; X64-AVX512-LABEL: test_mm_setr_epi16:
   4337 ; X64-AVX512:       # %bb.0:
   4338 ; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10]
   4339 ; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
   4340 ; X64-AVX512-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
   4341 ; X64-AVX512-NEXT:    vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
   4342 ; X64-AVX512-NEXT:    vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
   4343 ; X64-AVX512-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
   4344 ; X64-AVX512-NEXT:    vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
   4345 ; X64-AVX512-NEXT:    vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
   4346 ; X64-AVX512-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   4347 ; X64-AVX512-NEXT:    vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07]
   4348 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   4349   %res0  = insertelement <8 x i16> undef, i16 %a0, i32 0
   4350   %res1  = insertelement <8 x i16> %res0, i16 %a1, i32 1
   4351   %res2  = insertelement <8 x i16> %res1, i16 %a2, i32 2
   4352   %res3  = insertelement <8 x i16> %res2, i16 %a3, i32 3
   4353   %res4  = insertelement <8 x i16> %res3, i16 %a4, i32 4
   4354   %res5  = insertelement <8 x i16> %res4, i16 %a5, i32 5
   4355   %res6  = insertelement <8 x i16> %res5, i16 %a6, i32 6
   4356   %res7  = insertelement <8 x i16> %res6, i16 %a7, i32 7
   4357   %res = bitcast <8 x i16> %res7 to <2 x i64>
   4358   ret <2 x i64> %res
   4359 }
   4360 
   4361 define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
   4362 ; X86-SSE-LABEL: test_mm_setr_epi32:
   4363 ; X86-SSE:       # %bb.0:
   4364 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
   4365 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   4366 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
   4367 ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
   4368 ; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
   4369 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
   4370 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08]
   4371 ; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
   4372 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
   4373 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   4374 ; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
   4375 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   4376 ; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
   4377 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   4378 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   4379 ;
   4380 ; X86-AVX1-LABEL: test_mm_setr_epi32:
   4381 ; X86-AVX1:       # %bb.0:
   4382 ; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
   4383 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
   4384 ; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
   4385 ; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
   4386 ; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
   4387 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   4388 ;
   4389 ; X86-AVX512-LABEL: test_mm_setr_epi32:
   4390 ; X86-AVX512:       # %bb.0:
   4391 ; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
   4392 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
   4393 ; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
   4394 ; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
   4395 ; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
   4396 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   4397 ;
   4398 ; X64-SSE-LABEL: test_mm_setr_epi32:
   4399 ; X64-SSE:       # %bb.0:
   4400 ; X64-SSE-NEXT:    movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
   4401 ; X64-SSE-NEXT:    movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
   4402 ; X64-SSE-NEXT:    punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
   4403 ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
   4404 ; X64-SSE-NEXT:    movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6]
   4405 ; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
   4406 ; X64-SSE-NEXT:    punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
   4407 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   4408 ; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
   4409 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   4410 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   4411 ;
   4412 ; X64-AVX1-LABEL: test_mm_setr_epi32:
   4413 ; X64-AVX1:       # %bb.0:
   4414 ; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
   4415 ; X64-AVX1-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
   4416 ; X64-AVX1-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
   4417 ; X64-AVX1-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
   4418 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   4419 ;
   4420 ; X64-AVX512-LABEL: test_mm_setr_epi32:
   4421 ; X64-AVX512:       # %bb.0:
   4422 ; X64-AVX512-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
   4423 ; X64-AVX512-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
   4424 ; X64-AVX512-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
   4425 ; X64-AVX512-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
   4426 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   4427   %res0  = insertelement <4 x i32> undef, i32 %a0, i32 0
   4428   %res1  = insertelement <4 x i32> %res0, i32 %a1, i32 1
   4429   %res2  = insertelement <4 x i32> %res1, i32 %a2, i32 2
   4430   %res3  = insertelement <4 x i32> %res2, i32 %a3, i32 3
   4431   %res = bitcast <4 x i32> %res3 to <2 x i64>
   4432   ret <2 x i64> %res
   4433 }
   4434 
   4435 ; TODO test_mm_setr_epi64
   4436 
   4437 define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
   4438 ; X86-SSE-LABEL: test_mm_setr_epi64x:
   4439 ; X86-SSE:       # %bb.0:
   4440 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
   4441 ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
   4442 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
   4443 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   4444 ; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
   4445 ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
   4446 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
   4447 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
   4448 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08]
   4449 ; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
   4450 ; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
   4451 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   4452 ; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
   4453 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   4454 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   4455 ;
   4456 ; X86-AVX1-LABEL: test_mm_setr_epi64x:
   4457 ; X86-AVX1:       # %bb.0:
   4458 ; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
   4459 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
   4460 ; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
   4461 ; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
   4462 ; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
   4463 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   4464 ;
   4465 ; X86-AVX512-LABEL: test_mm_setr_epi64x:
   4466 ; X86-AVX512:       # %bb.0:
   4467 ; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
   4468 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
   4469 ; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
   4470 ; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
   4471 ; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
   4472 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   4473 ;
   4474 ; X64-SSE-LABEL: test_mm_setr_epi64x:
   4475 ; X64-SSE:       # %bb.0:
   4476 ; X64-SSE-NEXT:    movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce]
   4477 ; X64-SSE-NEXT:    movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
   4478 ; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
   4479 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   4480 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   4481 ;
   4482 ; X64-AVX1-LABEL: test_mm_setr_epi64x:
   4483 ; X64-AVX1:       # %bb.0:
   4484 ; X64-AVX1-NEXT:    vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
   4485 ; X64-AVX1-NEXT:    vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
   4486 ; X64-AVX1-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
   4487 ; X64-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
   4488 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   4489 ;
   4490 ; X64-AVX512-LABEL: test_mm_setr_epi64x:
   4491 ; X64-AVX512:       # %bb.0:
   4492 ; X64-AVX512-NEXT:    vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
   4493 ; X64-AVX512-NEXT:    vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
   4494 ; X64-AVX512-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
   4495 ; X64-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
   4496 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   4497   %res0  = insertelement <2 x i64> undef, i64 %a0, i32 0
   4498   %res1  = insertelement <2 x i64> %res0, i64 %a1, i32 1
   4499   ret <2 x i64> %res1
   4500 }
   4501 
   4502 define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
   4503 ; X86-SSE-LABEL: test_mm_setr_pd:
   4504 ; X86-SSE:       # %bb.0:
   4505 ; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x0c]
   4506 ; X86-SSE-NEXT:    # xmm1 = mem[0],zero
   4507 ; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
   4508 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero
   4509 ; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
   4510 ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   4511 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   4512 ;
   4513 ; X86-AVX1-LABEL: test_mm_setr_pd:
   4514 ; X86-AVX1:       # %bb.0:
   4515 ; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
   4516 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
   4517 ; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
   4518 ; X86-AVX1-NEXT:    # xmm1 = mem[0],zero
   4519 ; X86-AVX1-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0]
   4520 ; X86-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
   4521 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   4522 ;
   4523 ; X86-AVX512-LABEL: test_mm_setr_pd:
   4524 ; X86-AVX512:       # %bb.0:
   4525 ; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
   4526 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
   4527 ; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
   4528 ; X86-AVX512-NEXT:    # xmm1 = mem[0],zero
   4529 ; X86-AVX512-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
   4530 ; X86-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
   4531 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   4532 ;
   4533 ; X64-SSE-LABEL: test_mm_setr_pd:
   4534 ; X64-SSE:       # %bb.0:
   4535 ; X64-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
   4536 ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   4537 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   4538 ;
   4539 ; X64-AVX1-LABEL: test_mm_setr_pd:
   4540 ; X64-AVX1:       # %bb.0:
   4541 ; X64-AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
   4542 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   4543 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   4544 ;
   4545 ; X64-AVX512-LABEL: test_mm_setr_pd:
   4546 ; X64-AVX512:       # %bb.0:
   4547 ; X64-AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
   4548 ; X64-AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   4549 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   4550   %res0  = insertelement <2 x double> undef, double %a0, i32 0
   4551   %res1  = insertelement <2 x double> %res0, double %a1, i32 1
   4552   ret <2 x double> %res1
   4553 }
   4554 
   4555 define <2 x double> @test_mm_setzero_pd() {
   4556 ; SSE-LABEL: test_mm_setzero_pd:
   4557 ; SSE:       # %bb.0:
   4558 ; SSE-NEXT:    xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
   4559 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4560 ;
   4561 ; AVX1-LABEL: test_mm_setzero_pd:
   4562 ; AVX1:       # %bb.0:
   4563 ; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
   4564 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4565 ;
   4566 ; AVX512-LABEL: test_mm_setzero_pd:
   4567 ; AVX512:       # %bb.0:
   4568 ; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
   4569 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4570   ret <2 x double> zeroinitializer
   4571 }
   4572 
   4573 define <2 x i64> @test_mm_setzero_si128() {
   4574 ; SSE-LABEL: test_mm_setzero_si128:
   4575 ; SSE:       # %bb.0:
   4576 ; SSE-NEXT:    xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
   4577 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4578 ;
   4579 ; AVX1-LABEL: test_mm_setzero_si128:
   4580 ; AVX1:       # %bb.0:
   4581 ; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
   4582 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4583 ;
   4584 ; AVX512-LABEL: test_mm_setzero_si128:
   4585 ; AVX512:       # %bb.0:
   4586 ; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
   4587 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4588   ret <2 x i64> zeroinitializer
   4589 }
   4590 
   4591 define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
   4592 ; SSE-LABEL: test_mm_shuffle_epi32:
   4593 ; SSE:       # %bb.0:
   4594 ; SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
   4595 ; SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
   4596 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4597 ;
   4598 ; AVX1-LABEL: test_mm_shuffle_epi32:
   4599 ; AVX1:       # %bb.0:
   4600 ; AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
   4601 ; AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
   4602 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4603 ;
   4604 ; AVX512-LABEL: test_mm_shuffle_epi32:
   4605 ; AVX512:       # %bb.0:
   4606 ; AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
   4607 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4608   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   4609   %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
   4610   %bc = bitcast <4 x i32> %res to <2 x i64>
   4611   ret <2 x i64> %bc
   4612 }
   4613 
   4614 define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
   4615 ; SSE-LABEL: test_mm_shuffle_pd:
   4616 ; SSE:       # %bb.0:
   4617 ; SSE-NEXT:    shufpd $1, %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc1,0x01]
   4618 ; SSE-NEXT:    # xmm0 = xmm0[1],xmm1[0]
   4619 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4620 ;
   4621 ; AVX1-LABEL: test_mm_shuffle_pd:
   4622 ; AVX1:       # %bb.0:
   4623 ; AVX1-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc1,0x01]
   4624 ; AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[0]
   4625 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4626 ;
   4627 ; AVX512-LABEL: test_mm_shuffle_pd:
   4628 ; AVX512:       # %bb.0:
   4629 ; AVX512-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc1,0x01]
   4630 ; AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[0]
   4631 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4632   %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
   4633   ret <2 x double> %res
   4634 }
   4635 
   4636 define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
   4637 ; SSE-LABEL: test_mm_shufflehi_epi16:
   4638 ; SSE:       # %bb.0:
   4639 ; SSE-NEXT:    pshufhw $0, %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x70,0xc0,0x00]
   4640 ; SSE-NEXT:    # xmm0 = xmm0[0,1,2,3,4,4,4,4]
   4641 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4642 ;
   4643 ; AVX1-LABEL: test_mm_shufflehi_epi16:
   4644 ; AVX1:       # %bb.0:
   4645 ; AVX1-NEXT:    vpshufhw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x70,0xc0,0x00]
   4646 ; AVX1-NEXT:    # xmm0 = xmm0[0,1,2,3,4,4,4,4]
   4647 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4648 ;
   4649 ; AVX512-LABEL: test_mm_shufflehi_epi16:
   4650 ; AVX512:       # %bb.0:
   4651 ; AVX512-NEXT:    vpshufhw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x00]
   4652 ; AVX512-NEXT:    # xmm0 = xmm0[0,1,2,3,4,4,4,4]
   4653 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4654   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   4655   %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
   4656   %bc = bitcast <8 x i16> %res to <2 x i64>
   4657   ret <2 x i64> %bc
   4658 }
   4659 
   4660 define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
   4661 ; SSE-LABEL: test_mm_shufflelo_epi16:
   4662 ; SSE:       # %bb.0:
   4663 ; SSE-NEXT:    pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
   4664 ; SSE-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
   4665 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4666 ;
   4667 ; AVX1-LABEL: test_mm_shufflelo_epi16:
   4668 ; AVX1:       # %bb.0:
   4669 ; AVX1-NEXT:    vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00]
   4670 ; AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
   4671 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4672 ;
   4673 ; AVX512-LABEL: test_mm_shufflelo_epi16:
   4674 ; AVX512:       # %bb.0:
   4675 ; AVX512-NEXT:    vpshuflw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x00]
   4676 ; AVX512-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
   4677 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4678   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   4679   %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
   4680   %bc = bitcast <8 x i16> %res to <2 x i64>
   4681   ret <2 x i64> %bc
   4682 }
   4683 
   4684 define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
   4685 ; SSE-LABEL: test_mm_sll_epi16:
   4686 ; SSE:       # %bb.0:
   4687 ; SSE-NEXT:    psllw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf1,0xc1]
   4688 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4689 ;
   4690 ; AVX1-LABEL: test_mm_sll_epi16:
   4691 ; AVX1:       # %bb.0:
   4692 ; AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf1,0xc1]
   4693 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4694 ;
   4695 ; AVX512-LABEL: test_mm_sll_epi16:
   4696 ; AVX512:       # %bb.0:
   4697 ; AVX512-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
   4698 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4699   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   4700   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   4701   %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
   4702   %bc = bitcast <8 x i16> %res to <2 x i64>
   4703   ret <2 x i64> %bc
   4704 }
   4705 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
   4706 
   4707 define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
   4708 ; SSE-LABEL: test_mm_sll_epi32:
   4709 ; SSE:       # %bb.0:
   4710 ; SSE-NEXT:    pslld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf2,0xc1]
   4711 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4712 ;
   4713 ; AVX1-LABEL: test_mm_sll_epi32:
   4714 ; AVX1:       # %bb.0:
   4715 ; AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf2,0xc1]
   4716 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4717 ;
   4718 ; AVX512-LABEL: test_mm_sll_epi32:
   4719 ; AVX512:       # %bb.0:
   4720 ; AVX512-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1]
   4721 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4722   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   4723   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   4724   %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
   4725   %bc = bitcast <4 x i32> %res to <2 x i64>
   4726   ret <2 x i64> %bc
   4727 }
   4728 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
   4729 
   4730 define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
   4731 ; SSE-LABEL: test_mm_sll_epi64:
   4732 ; SSE:       # %bb.0:
   4733 ; SSE-NEXT:    psllq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf3,0xc1]
   4734 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4735 ;
   4736 ; AVX1-LABEL: test_mm_sll_epi64:
   4737 ; AVX1:       # %bb.0:
   4738 ; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf3,0xc1]
   4739 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4740 ;
   4741 ; AVX512-LABEL: test_mm_sll_epi64:
   4742 ; AVX512:       # %bb.0:
   4743 ; AVX512-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1]
   4744 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4745   %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
   4746   ret <2 x i64> %res
   4747 }
   4748 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
   4749 
   4750 define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
   4751 ; SSE-LABEL: test_mm_slli_epi16:
   4752 ; SSE:       # %bb.0:
   4753 ; SSE-NEXT:    psllw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xf0,0x01]
   4754 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4755 ;
   4756 ; AVX1-LABEL: test_mm_slli_epi16:
   4757 ; AVX1:       # %bb.0:
   4758 ; AVX1-NEXT:    vpsllw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x01]
   4759 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4760 ;
   4761 ; AVX512-LABEL: test_mm_slli_epi16:
   4762 ; AVX512:       # %bb.0:
   4763 ; AVX512-NEXT:    vpsllw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x01]
   4764 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4765   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   4766   %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
   4767   %bc = bitcast <8 x i16> %res to <2 x i64>
   4768   ret <2 x i64> %bc
   4769 }
   4770 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
   4771 
   4772 define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
   4773 ; SSE-LABEL: test_mm_slli_epi32:
   4774 ; SSE:       # %bb.0:
   4775 ; SSE-NEXT:    pslld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xf0,0x01]
   4776 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4777 ;
   4778 ; AVX1-LABEL: test_mm_slli_epi32:
   4779 ; AVX1:       # %bb.0:
   4780 ; AVX1-NEXT:    vpslld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xf0,0x01]
   4781 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4782 ;
   4783 ; AVX512-LABEL: test_mm_slli_epi32:
   4784 ; AVX512:       # %bb.0:
   4785 ; AVX512-NEXT:    vpslld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x01]
   4786 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4787   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   4788   %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
   4789   %bc = bitcast <4 x i32> %res to <2 x i64>
   4790   ret <2 x i64> %bc
   4791 }
   4792 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
   4793 
   4794 define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
   4795 ; SSE-LABEL: test_mm_slli_epi64:
   4796 ; SSE:       # %bb.0:
   4797 ; SSE-NEXT:    psllq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xf0,0x01]
   4798 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4799 ;
   4800 ; AVX1-LABEL: test_mm_slli_epi64:
   4801 ; AVX1:       # %bb.0:
   4802 ; AVX1-NEXT:    vpsllq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf0,0x01]
   4803 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4804 ;
   4805 ; AVX512-LABEL: test_mm_slli_epi64:
   4806 ; AVX512:       # %bb.0:
   4807 ; AVX512-NEXT:    vpsllq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x01]
   4808 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4809   %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
   4810   ret <2 x i64> %res
   4811 }
   4812 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
   4813 
   4814 define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
   4815 ; SSE-LABEL: test_mm_slli_si128:
   4816 ; SSE:       # %bb.0:
   4817 ; SSE-NEXT:    pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05]
   4818 ; SSE-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
   4819 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4820 ;
   4821 ; AVX1-LABEL: test_mm_slli_si128:
   4822 ; AVX1:       # %bb.0:
   4823 ; AVX1-NEXT:    vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05]
   4824 ; AVX1-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
   4825 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4826 ;
   4827 ; AVX512-LABEL: test_mm_slli_si128:
   4828 ; AVX512:       # %bb.0:
   4829 ; AVX512-NEXT:    vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05]
   4830 ; AVX512-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
   4831 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4832   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   4833   %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
   4834   %bc = bitcast <16 x i8> %res to <2 x i64>
   4835   ret <2 x i64> %bc
   4836 }
   4837 
   4838 define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
   4839 ; SSE-LABEL: test_mm_sqrt_pd:
   4840 ; SSE:       # %bb.0:
   4841 ; SSE-NEXT:    sqrtpd %xmm0, %xmm0 # encoding: [0x66,0x0f,0x51,0xc0]
   4842 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4843 ;
   4844 ; AVX1-LABEL: test_mm_sqrt_pd:
   4845 ; AVX1:       # %bb.0:
   4846 ; AVX1-NEXT:    vsqrtpd %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x51,0xc0]
   4847 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4848 ;
   4849 ; AVX512-LABEL: test_mm_sqrt_pd:
   4850 ; AVX512:       # %bb.0:
   4851 ; AVX512-NEXT:    vsqrtpd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
   4852 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4853   %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
   4854   ret <2 x double> %res
   4855 }
   4856 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone
   4857 
   4858 define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   4859 ; SSE-LABEL: test_mm_sqrt_sd:
   4860 ; SSE:       # %bb.0:
   4861 ; SSE-NEXT:    sqrtsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0x51,0xc8]
   4862 ; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
   4863 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4864 ;
   4865 ; AVX1-LABEL: test_mm_sqrt_sd:
   4866 ; AVX1:       # %bb.0:
   4867 ; AVX1-NEXT:    vsqrtsd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf3,0x51,0xc0]
   4868 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4869 ;
   4870 ; AVX512-LABEL: test_mm_sqrt_sd:
   4871 ; AVX512:       # %bb.0:
   4872 ; AVX512-NEXT:    vsqrtsd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf3,0x51,0xc0]
   4873 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4874   %ext = extractelement <2 x double> %a0, i32 0
   4875   %sqrt = call double @llvm.sqrt.f64(double %ext)
   4876   %ins = insertelement <2 x double> %a1, double %sqrt, i32 0
   4877   ret <2 x double> %ins
   4878 }
   4879 declare double @llvm.sqrt.f64(double) nounwind readnone
   4880 
   4881 ; This doesn't match a clang test, but helps with fast-isel coverage.
   4882 define double @test_mm_sqrt_sd_scalar(double %a0) nounwind {
   4883 ; X86-SSE-LABEL: test_mm_sqrt_sd_scalar:
   4884 ; X86-SSE:       # %bb.0:
   4885 ; X86-SSE-NEXT:    pushl %ebp # encoding: [0x55]
   4886 ; X86-SSE-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
   4887 ; X86-SSE-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
   4888 ; X86-SSE-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
   4889 ; X86-SSE-NEXT:    movsd 8(%ebp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x45,0x08]
   4890 ; X86-SSE-NEXT:    # xmm0 = mem[0],zero
   4891 ; X86-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
   4892 ; X86-SSE-NEXT:    movsd %xmm0, (%esp) # encoding: [0xf2,0x0f,0x11,0x04,0x24]
   4893 ; X86-SSE-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
   4894 ; X86-SSE-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
   4895 ; X86-SSE-NEXT:    popl %ebp # encoding: [0x5d]
   4896 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   4897 ;
   4898 ; X86-AVX1-LABEL: test_mm_sqrt_sd_scalar:
   4899 ; X86-AVX1:       # %bb.0:
   4900 ; X86-AVX1-NEXT:    pushl %ebp # encoding: [0x55]
   4901 ; X86-AVX1-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
   4902 ; X86-AVX1-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
   4903 ; X86-AVX1-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
   4904 ; X86-AVX1-NEXT:    vmovsd 8(%ebp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x45,0x08]
   4905 ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
   4906 ; X86-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
   4907 ; X86-AVX1-NEXT:    vmovsd %xmm0, (%esp) # encoding: [0xc5,0xfb,0x11,0x04,0x24]
   4908 ; X86-AVX1-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
   4909 ; X86-AVX1-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
   4910 ; X86-AVX1-NEXT:    popl %ebp # encoding: [0x5d]
   4911 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   4912 ;
   4913 ; X86-AVX512-LABEL: test_mm_sqrt_sd_scalar:
   4914 ; X86-AVX512:       # %bb.0:
   4915 ; X86-AVX512-NEXT:    pushl %ebp # encoding: [0x55]
   4916 ; X86-AVX512-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
   4917 ; X86-AVX512-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
   4918 ; X86-AVX512-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
   4919 ; X86-AVX512-NEXT:    vmovsd 8(%ebp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x45,0x08]
   4920 ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
   4921 ; X86-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
   4922 ; X86-AVX512-NEXT:    vmovsd %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x24]
   4923 ; X86-AVX512-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
   4924 ; X86-AVX512-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
   4925 ; X86-AVX512-NEXT:    popl %ebp # encoding: [0x5d]
   4926 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   4927 ;
   4928 ; X64-SSE-LABEL: test_mm_sqrt_sd_scalar:
   4929 ; X64-SSE:       # %bb.0:
   4930 ; X64-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
   4931 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   4932 ;
   4933 ; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar:
   4934 ; X64-AVX1:       # %bb.0:
   4935 ; X64-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
   4936 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   4937 ;
   4938 ; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar:
   4939 ; X64-AVX512:       # %bb.0:
   4940 ; X64-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
   4941 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   4942   %sqrt = call double @llvm.sqrt.f64(double %a0)
   4943   ret double %sqrt
   4944 }
   4945 
   4946 define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
   4947 ; SSE-LABEL: test_mm_sra_epi16:
   4948 ; SSE:       # %bb.0:
   4949 ; SSE-NEXT:    psraw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe1,0xc1]
   4950 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4951 ;
   4952 ; AVX1-LABEL: test_mm_sra_epi16:
   4953 ; AVX1:       # %bb.0:
   4954 ; AVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe1,0xc1]
   4955 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4956 ;
   4957 ; AVX512-LABEL: test_mm_sra_epi16:
   4958 ; AVX512:       # %bb.0:
   4959 ; AVX512-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
   4960 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4961   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   4962   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   4963   %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
   4964   %bc = bitcast <8 x i16> %res to <2 x i64>
   4965   ret <2 x i64> %bc
   4966 }
   4967 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
   4968 
   4969 define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
   4970 ; SSE-LABEL: test_mm_sra_epi32:
   4971 ; SSE:       # %bb.0:
   4972 ; SSE-NEXT:    psrad %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe2,0xc1]
   4973 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4974 ;
   4975 ; AVX1-LABEL: test_mm_sra_epi32:
   4976 ; AVX1:       # %bb.0:
   4977 ; AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe2,0xc1]
   4978 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4979 ;
   4980 ; AVX512-LABEL: test_mm_sra_epi32:
   4981 ; AVX512:       # %bb.0:
   4982 ; AVX512-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1]
   4983 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4984   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   4985   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   4986   %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
   4987   %bc = bitcast <4 x i32> %res to <2 x i64>
   4988   ret <2 x i64> %bc
   4989 }
   4990 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
   4991 
   4992 define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
   4993 ; SSE-LABEL: test_mm_srai_epi16:
   4994 ; SSE:       # %bb.0:
   4995 ; SSE-NEXT:    psraw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xe0,0x01]
   4996 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4997 ;
   4998 ; AVX1-LABEL: test_mm_srai_epi16:
   4999 ; AVX1:       # %bb.0:
   5000 ; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xe0,0x01]
   5001 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5002 ;
   5003 ; AVX512-LABEL: test_mm_srai_epi16:
   5004 ; AVX512:       # %bb.0:
   5005 ; AVX512-NEXT:    vpsraw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x01]
   5006 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5007   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   5008   %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
   5009   %bc = bitcast <8 x i16> %res to <2 x i64>
   5010   ret <2 x i64> %bc
   5011 }
   5012 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
   5013 
   5014 define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
   5015 ; SSE-LABEL: test_mm_srai_epi32:
   5016 ; SSE:       # %bb.0:
   5017 ; SSE-NEXT:    psrad $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xe0,0x01]
   5018 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5019 ;
   5020 ; AVX1-LABEL: test_mm_srai_epi32:
   5021 ; AVX1:       # %bb.0:
   5022 ; AVX1-NEXT:    vpsrad $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xe0,0x01]
   5023 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5024 ;
   5025 ; AVX512-LABEL: test_mm_srai_epi32:
   5026 ; AVX512:       # %bb.0:
   5027 ; AVX512-NEXT:    vpsrad $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x01]
   5028 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5029   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   5030   %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
   5031   %bc = bitcast <4 x i32> %res to <2 x i64>
   5032   ret <2 x i64> %bc
   5033 }
   5034 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
   5035 
   5036 define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
   5037 ; SSE-LABEL: test_mm_srl_epi16:
   5038 ; SSE:       # %bb.0:
   5039 ; SSE-NEXT:    psrlw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd1,0xc1]
   5040 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5041 ;
   5042 ; AVX1-LABEL: test_mm_srl_epi16:
   5043 ; AVX1:       # %bb.0:
   5044 ; AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd1,0xc1]
   5045 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5046 ;
   5047 ; AVX512-LABEL: test_mm_srl_epi16:
   5048 ; AVX512:       # %bb.0:
   5049 ; AVX512-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
   5050 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5051   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   5052   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   5053   %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
   5054   %bc = bitcast <8 x i16> %res to <2 x i64>
   5055   ret <2 x i64> %bc
   5056 }
   5057 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
   5058 
   5059 define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
   5060 ; SSE-LABEL: test_mm_srl_epi32:
   5061 ; SSE:       # %bb.0:
   5062 ; SSE-NEXT:    psrld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd2,0xc1]
   5063 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5064 ;
   5065 ; AVX1-LABEL: test_mm_srl_epi32:
   5066 ; AVX1:       # %bb.0:
   5067 ; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd2,0xc1]
   5068 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5069 ;
   5070 ; AVX512-LABEL: test_mm_srl_epi32:
   5071 ; AVX512:       # %bb.0:
   5072 ; AVX512-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1]
   5073 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5074   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   5075   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   5076   %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
   5077   %bc = bitcast <4 x i32> %res to <2 x i64>
   5078   ret <2 x i64> %bc
   5079 }
   5080 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
   5081 
   5082 define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
   5083 ; SSE-LABEL: test_mm_srl_epi64:
   5084 ; SSE:       # %bb.0:
   5085 ; SSE-NEXT:    psrlq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd3,0xc1]
   5086 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5087 ;
   5088 ; AVX1-LABEL: test_mm_srl_epi64:
   5089 ; AVX1:       # %bb.0:
   5090 ; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd3,0xc1]
   5091 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5092 ;
   5093 ; AVX512-LABEL: test_mm_srl_epi64:
   5094 ; AVX512:       # %bb.0:
   5095 ; AVX512-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1]
   5096 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5097   %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
   5098   ret <2 x i64> %res
   5099 }
   5100 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
   5101 
   5102 define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
   5103 ; SSE-LABEL: test_mm_srli_epi16:
   5104 ; SSE:       # %bb.0:
   5105 ; SSE-NEXT:    psrlw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xd0,0x01]
   5106 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5107 ;
   5108 ; AVX1-LABEL: test_mm_srli_epi16:
   5109 ; AVX1:       # %bb.0:
   5110 ; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xd0,0x01]
   5111 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5112 ;
   5113 ; AVX512-LABEL: test_mm_srli_epi16:
   5114 ; AVX512:       # %bb.0:
   5115 ; AVX512-NEXT:    vpsrlw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x01]
   5116 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5117   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   5118   %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
   5119   %bc = bitcast <8 x i16> %res to <2 x i64>
   5120   ret <2 x i64> %bc
   5121 }
   5122 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
   5123 
   5124 define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
   5125 ; SSE-LABEL: test_mm_srli_epi32:
   5126 ; SSE:       # %bb.0:
   5127 ; SSE-NEXT:    psrld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xd0,0x01]
   5128 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5129 ;
   5130 ; AVX1-LABEL: test_mm_srli_epi32:
   5131 ; AVX1:       # %bb.0:
   5132 ; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xd0,0x01]
   5133 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5134 ;
   5135 ; AVX512-LABEL: test_mm_srli_epi32:
   5136 ; AVX512:       # %bb.0:
   5137 ; AVX512-NEXT:    vpsrld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x01]
   5138 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5139   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   5140   %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
   5141   %bc = bitcast <4 x i32> %res to <2 x i64>
   5142   ret <2 x i64> %bc
   5143 }
   5144 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
   5145 
   5146 define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
   5147 ; SSE-LABEL: test_mm_srli_epi64:
   5148 ; SSE:       # %bb.0:
   5149 ; SSE-NEXT:    psrlq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xd0,0x01]
   5150 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5151 ;
   5152 ; AVX1-LABEL: test_mm_srli_epi64:
   5153 ; AVX1:       # %bb.0:
   5154 ; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd0,0x01]
   5155 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5156 ;
   5157 ; AVX512-LABEL: test_mm_srli_epi64:
   5158 ; AVX512:       # %bb.0:
   5159 ; AVX512-NEXT:    vpsrlq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x01]
   5160 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5161   %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
   5162   ret <2 x i64> %res
   5163 }
   5164 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
   5165 
   5166 define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
   5167 ; SSE-LABEL: test_mm_srli_si128:
   5168 ; SSE:       # %bb.0:
   5169 ; SSE-NEXT:    psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05]
   5170 ; SSE-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
   5171 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5172 ;
   5173 ; AVX1-LABEL: test_mm_srli_si128:
   5174 ; AVX1:       # %bb.0:
   5175 ; AVX1-NEXT:    vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05]
   5176 ; AVX1-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
   5177 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5178 ;
   5179 ; AVX512-LABEL: test_mm_srli_si128:
   5180 ; AVX512:       # %bb.0:
   5181 ; AVX512-NEXT:    vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05]
   5182 ; AVX512-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
   5183 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5184   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   5185   %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
   5186   %bc = bitcast <16 x i8> %res to <2 x i64>
   5187   ret <2 x i64> %bc
   5188 }
   5189 
   5190 define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
   5191 ; X86-SSE-LABEL: test_mm_store_pd:
   5192 ; X86-SSE:       # %bb.0:
   5193 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5194 ; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
   5195 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5196 ;
   5197 ; X86-AVX1-LABEL: test_mm_store_pd:
   5198 ; X86-AVX1:       # %bb.0:
   5199 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5200 ; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
   5201 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5202 ;
   5203 ; X86-AVX512-LABEL: test_mm_store_pd:
   5204 ; X86-AVX512:       # %bb.0:
   5205 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5206 ; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
   5207 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5208 ;
   5209 ; X64-SSE-LABEL: test_mm_store_pd:
   5210 ; X64-SSE:       # %bb.0:
   5211 ; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
   5212 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5213 ;
   5214 ; X64-AVX1-LABEL: test_mm_store_pd:
   5215 ; X64-AVX1:       # %bb.0:
   5216 ; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
   5217 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5218 ;
   5219 ; X64-AVX512-LABEL: test_mm_store_pd:
   5220 ; X64-AVX512:       # %bb.0:
   5221 ; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
   5222 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5223   %arg0 = bitcast double* %a0 to <2 x double>*
   5224   store <2 x double> %a1, <2 x double>* %arg0, align 16
   5225   ret void
   5226 }
   5227 
   5228 define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
   5229 ; X86-SSE-LABEL: test_mm_store_pd1:
   5230 ; X86-SSE:       # %bb.0:
   5231 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5232 ; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
   5233 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
   5234 ; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
   5235 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5236 ;
   5237 ; X86-AVX1-LABEL: test_mm_store_pd1:
   5238 ; X86-AVX1:       # %bb.0:
   5239 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5240 ; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
   5241 ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
   5242 ; X86-AVX1-NEXT:    vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00]
   5243 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5244 ;
   5245 ; X86-AVX512-LABEL: test_mm_store_pd1:
   5246 ; X86-AVX512:       # %bb.0:
   5247 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5248 ; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
   5249 ; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
   5250 ; X86-AVX512-NEXT:    vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
   5251 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5252 ;
   5253 ; X64-SSE-LABEL: test_mm_store_pd1:
   5254 ; X64-SSE:       # %bb.0:
   5255 ; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
   5256 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
   5257 ; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
   5258 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5259 ;
   5260 ; X64-AVX1-LABEL: test_mm_store_pd1:
   5261 ; X64-AVX1:       # %bb.0:
   5262 ; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
   5263 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
   5264 ; X64-AVX1-NEXT:    vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07]
   5265 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5266 ;
   5267 ; X64-AVX512-LABEL: test_mm_store_pd1:
   5268 ; X64-AVX512:       # %bb.0:
   5269 ; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
   5270 ; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
   5271 ; X64-AVX512-NEXT:    vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
   5272 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5273   %arg0 = bitcast double * %a0 to <2 x double>*
   5274   %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
   5275   store <2 x double> %shuf, <2 x double>* %arg0, align 16
   5276   ret void
   5277 }
   5278 
   5279 define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
   5280 ; X86-SSE-LABEL: test_mm_store_sd:
   5281 ; X86-SSE:       # %bb.0:
   5282 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5283 ; X86-SSE-NEXT:    movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
   5284 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5285 ;
   5286 ; X86-AVX1-LABEL: test_mm_store_sd:
   5287 ; X86-AVX1:       # %bb.0:
   5288 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5289 ; X86-AVX1-NEXT:    vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
   5290 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5291 ;
   5292 ; X86-AVX512-LABEL: test_mm_store_sd:
   5293 ; X86-AVX512:       # %bb.0:
   5294 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5295 ; X86-AVX512-NEXT:    vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
   5296 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5297 ;
   5298 ; X64-SSE-LABEL: test_mm_store_sd:
   5299 ; X64-SSE:       # %bb.0:
   5300 ; X64-SSE-NEXT:    movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
   5301 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5302 ;
   5303 ; X64-AVX1-LABEL: test_mm_store_sd:
   5304 ; X64-AVX1:       # %bb.0:
   5305 ; X64-AVX1-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
   5306 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5307 ;
   5308 ; X64-AVX512-LABEL: test_mm_store_sd:
   5309 ; X64-AVX512:       # %bb.0:
   5310 ; X64-AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
   5311 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5312   %ext = extractelement <2 x double> %a1, i32 0
   5313   store double %ext, double* %a0, align 1
   5314   ret void
   5315 }
   5316 
   5317 define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
   5318 ; X86-SSE-LABEL: test_mm_store_si128:
   5319 ; X86-SSE:       # %bb.0:
   5320 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5321 ; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
   5322 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5323 ;
   5324 ; X86-AVX1-LABEL: test_mm_store_si128:
   5325 ; X86-AVX1:       # %bb.0:
   5326 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5327 ; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
   5328 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5329 ;
   5330 ; X86-AVX512-LABEL: test_mm_store_si128:
   5331 ; X86-AVX512:       # %bb.0:
   5332 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5333 ; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
   5334 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5335 ;
   5336 ; X64-SSE-LABEL: test_mm_store_si128:
   5337 ; X64-SSE:       # %bb.0:
   5338 ; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
   5339 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5340 ;
   5341 ; X64-AVX1-LABEL: test_mm_store_si128:
   5342 ; X64-AVX1:       # %bb.0:
   5343 ; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
   5344 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5345 ;
   5346 ; X64-AVX512-LABEL: test_mm_store_si128:
   5347 ; X64-AVX512:       # %bb.0:
   5348 ; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
   5349 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5350   store <2 x i64> %a1, <2 x i64>* %a0, align 16
   5351   ret void
   5352 }
   5353 
   5354 define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
   5355 ; X86-SSE-LABEL: test_mm_store1_pd:
   5356 ; X86-SSE:       # %bb.0:
   5357 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5358 ; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
   5359 ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
   5360 ; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
   5361 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5362 ;
   5363 ; X86-AVX1-LABEL: test_mm_store1_pd:
   5364 ; X86-AVX1:       # %bb.0:
   5365 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5366 ; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
   5367 ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
   5368 ; X86-AVX1-NEXT:    vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00]
   5369 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5370 ;
   5371 ; X86-AVX512-LABEL: test_mm_store1_pd:
   5372 ; X86-AVX512:       # %bb.0:
   5373 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5374 ; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
   5375 ; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
   5376 ; X86-AVX512-NEXT:    vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
   5377 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5378 ;
   5379 ; X64-SSE-LABEL: test_mm_store1_pd:
   5380 ; X64-SSE:       # %bb.0:
   5381 ; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
   5382 ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
   5383 ; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
   5384 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5385 ;
   5386 ; X64-AVX1-LABEL: test_mm_store1_pd:
   5387 ; X64-AVX1:       # %bb.0:
   5388 ; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
   5389 ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
   5390 ; X64-AVX1-NEXT:    vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07]
   5391 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5392 ;
   5393 ; X64-AVX512-LABEL: test_mm_store1_pd:
   5394 ; X64-AVX512:       # %bb.0:
   5395 ; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
   5396 ; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
   5397 ; X64-AVX512-NEXT:    vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
   5398 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5399   %arg0 = bitcast double * %a0 to <2 x double>*
   5400   %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
   5401   store <2 x double> %shuf, <2 x double>* %arg0, align 16
   5402   ret void
   5403 }
   5404 
   5405 define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
   5406 ; X86-SSE-LABEL: test_mm_storeh_sd:
   5407 ; X86-SSE:       # %bb.0:
   5408 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5409 ; X86-SSE-NEXT:    movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
   5410 ; X86-SSE-NEXT:    # xmm0 = xmm0[1,1]
   5411 ; X86-SSE-NEXT:    movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
   5412 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5413 ;
   5414 ; X86-AVX1-LABEL: test_mm_storeh_sd:
   5415 ; X86-AVX1:       # %bb.0:
   5416 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5417 ; X86-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
   5418 ; X86-AVX1-NEXT:    # xmm0 = xmm0[1,0]
   5419 ; X86-AVX1-NEXT:    vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
   5420 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5421 ;
   5422 ; X86-AVX512-LABEL: test_mm_storeh_sd:
   5423 ; X86-AVX512:       # %bb.0:
   5424 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5425 ; X86-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
   5426 ; X86-AVX512-NEXT:    # xmm0 = xmm0[1,0]
   5427 ; X86-AVX512-NEXT:    vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
   5428 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5429 ;
   5430 ; X64-SSE-LABEL: test_mm_storeh_sd:
   5431 ; X64-SSE:       # %bb.0:
   5432 ; X64-SSE-NEXT:    movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
   5433 ; X64-SSE-NEXT:    # xmm0 = xmm0[1,1]
   5434 ; X64-SSE-NEXT:    movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
   5435 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5436 ;
   5437 ; X64-AVX1-LABEL: test_mm_storeh_sd:
   5438 ; X64-AVX1:       # %bb.0:
   5439 ; X64-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
   5440 ; X64-AVX1-NEXT:    # xmm0 = xmm0[1,0]
   5441 ; X64-AVX1-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
   5442 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5443 ;
   5444 ; X64-AVX512-LABEL: test_mm_storeh_sd:
   5445 ; X64-AVX512:       # %bb.0:
   5446 ; X64-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
   5447 ; X64-AVX512-NEXT:    # xmm0 = xmm0[1,0]
   5448 ; X64-AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
   5449 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5450   %ext = extractelement <2 x double> %a1, i32 1
   5451   store double %ext, double* %a0, align 8
   5452   ret void
   5453 }
   5454 
   5455 define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
   5456 ; X86-SSE-LABEL: test_mm_storel_epi64:
   5457 ; X86-SSE:       # %bb.0:
   5458 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5459 ; X86-SSE-NEXT:    movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00]
   5460 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5461 ;
   5462 ; X86-AVX1-LABEL: test_mm_storel_epi64:
   5463 ; X86-AVX1:       # %bb.0:
   5464 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5465 ; X86-AVX1-NEXT:    vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00]
   5466 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5467 ;
   5468 ; X86-AVX512-LABEL: test_mm_storel_epi64:
   5469 ; X86-AVX512:       # %bb.0:
   5470 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5471 ; X86-AVX512-NEXT:    vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
   5472 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5473 ;
   5474 ; X64-SSE-LABEL: test_mm_storel_epi64:
   5475 ; X64-SSE:       # %bb.0:
   5476 ; X64-SSE-NEXT:    movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
   5477 ; X64-SSE-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
   5478 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5479 ;
   5480 ; X64-AVX1-LABEL: test_mm_storel_epi64:
   5481 ; X64-AVX1:       # %bb.0:
   5482 ; X64-AVX1-NEXT:    vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
   5483 ; X64-AVX1-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
   5484 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5485 ;
   5486 ; X64-AVX512-LABEL: test_mm_storel_epi64:
   5487 ; X64-AVX512:       # %bb.0:
   5488 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
   5489 ; X64-AVX512-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
   5490 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5491   %ext = extractelement <2 x i64> %a1, i32 0
   5492   %bc = bitcast <2 x i64> *%a0 to i64*
   5493   store i64 %ext, i64* %bc, align 8
   5494   ret void
   5495 }
   5496 
   5497 define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
   5498 ; X86-SSE-LABEL: test_mm_storel_sd:
   5499 ; X86-SSE:       # %bb.0:
   5500 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5501 ; X86-SSE-NEXT:    movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
   5502 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5503 ;
   5504 ; X86-AVX1-LABEL: test_mm_storel_sd:
   5505 ; X86-AVX1:       # %bb.0:
   5506 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5507 ; X86-AVX1-NEXT:    vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
   5508 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5509 ;
   5510 ; X86-AVX512-LABEL: test_mm_storel_sd:
   5511 ; X86-AVX512:       # %bb.0:
   5512 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5513 ; X86-AVX512-NEXT:    vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
   5514 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5515 ;
   5516 ; X64-SSE-LABEL: test_mm_storel_sd:
   5517 ; X64-SSE:       # %bb.0:
   5518 ; X64-SSE-NEXT:    movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
   5519 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5520 ;
   5521 ; X64-AVX1-LABEL: test_mm_storel_sd:
   5522 ; X64-AVX1:       # %bb.0:
   5523 ; X64-AVX1-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
   5524 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5525 ;
   5526 ; X64-AVX512-LABEL: test_mm_storel_sd:
   5527 ; X64-AVX512:       # %bb.0:
   5528 ; X64-AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
   5529 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5530   %ext = extractelement <2 x double> %a1, i32 0
   5531   store double %ext, double* %a0, align 8
   5532   ret void
   5533 }
   5534 
   5535 define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
   5536 ; X86-SSE-LABEL: test_mm_storer_pd:
   5537 ; X86-SSE:       # %bb.0:
   5538 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5539 ; X86-SSE-NEXT:    shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
   5540 ; X86-SSE-NEXT:    # xmm0 = xmm0[1,0]
   5541 ; X86-SSE-NEXT:    movapd %xmm0, (%eax) # encoding: [0x66,0x0f,0x29,0x00]
   5542 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5543 ;
   5544 ; X86-AVX1-LABEL: test_mm_storer_pd:
   5545 ; X86-AVX1:       # %bb.0:
   5546 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5547 ; X86-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
   5548 ; X86-AVX1-NEXT:    # xmm0 = xmm0[1,0]
   5549 ; X86-AVX1-NEXT:    vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00]
   5550 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5551 ;
   5552 ; X86-AVX512-LABEL: test_mm_storer_pd:
   5553 ; X86-AVX512:       # %bb.0:
   5554 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5555 ; X86-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
   5556 ; X86-AVX512-NEXT:    # xmm0 = xmm0[1,0]
   5557 ; X86-AVX512-NEXT:    vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
   5558 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5559 ;
   5560 ; X64-SSE-LABEL: test_mm_storer_pd:
   5561 ; X64-SSE:       # %bb.0:
   5562 ; X64-SSE-NEXT:    shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
   5563 ; X64-SSE-NEXT:    # xmm0 = xmm0[1,0]
   5564 ; X64-SSE-NEXT:    movapd %xmm0, (%rdi) # encoding: [0x66,0x0f,0x29,0x07]
   5565 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5566 ;
   5567 ; X64-AVX1-LABEL: test_mm_storer_pd:
   5568 ; X64-AVX1:       # %bb.0:
   5569 ; X64-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
   5570 ; X64-AVX1-NEXT:    # xmm0 = xmm0[1,0]
   5571 ; X64-AVX1-NEXT:    vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07]
   5572 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5573 ;
   5574 ; X64-AVX512-LABEL: test_mm_storer_pd:
   5575 ; X64-AVX512:       # %bb.0:
   5576 ; X64-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
   5577 ; X64-AVX512-NEXT:    # xmm0 = xmm0[1,0]
   5578 ; X64-AVX512-NEXT:    vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
   5579 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5580   %arg0 = bitcast double* %a0 to <2 x double>*
   5581   %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   5582   store <2 x double> %shuf, <2 x double>* %arg0, align 16
   5583   ret void
   5584 }
   5585 
   5586 define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
   5587 ; X86-SSE-LABEL: test_mm_storeu_pd:
   5588 ; X86-SSE:       # %bb.0:
   5589 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5590 ; X86-SSE-NEXT:    movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
   5591 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5592 ;
   5593 ; X86-AVX1-LABEL: test_mm_storeu_pd:
   5594 ; X86-AVX1:       # %bb.0:
   5595 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5596 ; X86-AVX1-NEXT:    vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
   5597 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5598 ;
   5599 ; X86-AVX512-LABEL: test_mm_storeu_pd:
   5600 ; X86-AVX512:       # %bb.0:
   5601 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5602 ; X86-AVX512-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
   5603 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5604 ;
   5605 ; X64-SSE-LABEL: test_mm_storeu_pd:
   5606 ; X64-SSE:       # %bb.0:
   5607 ; X64-SSE-NEXT:    movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07]
   5608 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5609 ;
   5610 ; X64-AVX1-LABEL: test_mm_storeu_pd:
   5611 ; X64-AVX1:       # %bb.0:
   5612 ; X64-AVX1-NEXT:    vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
   5613 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5614 ;
   5615 ; X64-AVX512-LABEL: test_mm_storeu_pd:
   5616 ; X64-AVX512:       # %bb.0:
   5617 ; X64-AVX512-NEXT:    vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
   5618 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5619   %arg0 = bitcast double* %a0 to <2 x double>*
   5620   store <2 x double> %a1, <2 x double>* %arg0, align 1
   5621   ret void
   5622 }
   5623 
   5624 define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
   5625 ; X86-SSE-LABEL: test_mm_storeu_si128:
   5626 ; X86-SSE:       # %bb.0:
   5627 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5628 ; X86-SSE-NEXT:    movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
   5629 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5630 ;
   5631 ; X86-AVX1-LABEL: test_mm_storeu_si128:
   5632 ; X86-AVX1:       # %bb.0:
   5633 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5634 ; X86-AVX1-NEXT:    vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
   5635 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5636 ;
   5637 ; X86-AVX512-LABEL: test_mm_storeu_si128:
   5638 ; X86-AVX512:       # %bb.0:
   5639 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5640 ; X86-AVX512-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
   5641 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5642 ;
   5643 ; X64-SSE-LABEL: test_mm_storeu_si128:
   5644 ; X64-SSE:       # %bb.0:
   5645 ; X64-SSE-NEXT:    movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07]
   5646 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5647 ;
   5648 ; X64-AVX1-LABEL: test_mm_storeu_si128:
   5649 ; X64-AVX1:       # %bb.0:
   5650 ; X64-AVX1-NEXT:    vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
   5651 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5652 ;
   5653 ; X64-AVX512-LABEL: test_mm_storeu_si128:
   5654 ; X64-AVX512:       # %bb.0:
   5655 ; X64-AVX512-NEXT:    vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
   5656 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5657   store <2 x i64> %a1, <2 x i64>* %a0, align 1
   5658   ret void
   5659 }
   5660 
   5661 define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
   5662 ; X86-SSE-LABEL: test_mm_stream_pd:
   5663 ; X86-SSE:       # %bb.0:
   5664 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5665 ; X86-SSE-NEXT:    movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00]
   5666 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5667 ;
   5668 ; X86-AVX1-LABEL: test_mm_stream_pd:
   5669 ; X86-AVX1:       # %bb.0:
   5670 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5671 ; X86-AVX1-NEXT:    vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00]
   5672 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5673 ;
   5674 ; X86-AVX512-LABEL: test_mm_stream_pd:
   5675 ; X86-AVX512:       # %bb.0:
   5676 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5677 ; X86-AVX512-NEXT:    vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00]
   5678 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5679 ;
   5680 ; X64-SSE-LABEL: test_mm_stream_pd:
   5681 ; X64-SSE:       # %bb.0:
   5682 ; X64-SSE-NEXT:    movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07]
   5683 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5684 ;
   5685 ; X64-AVX1-LABEL: test_mm_stream_pd:
   5686 ; X64-AVX1:       # %bb.0:
   5687 ; X64-AVX1-NEXT:    vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07]
   5688 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5689 ;
   5690 ; X64-AVX512-LABEL: test_mm_stream_pd:
   5691 ; X64-AVX512:       # %bb.0:
   5692 ; X64-AVX512-NEXT:    vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
   5693 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5694   %arg0 = bitcast double* %a0 to <2 x double>*
   5695   store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
   5696   ret void
   5697 }
   5698 
   5699 define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
   5700 ; X86-LABEL: test_mm_stream_si32:
   5701 ; X86:       # %bb.0:
   5702 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   5703 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   5704 ; X86-NEXT:    movntil %eax, (%ecx) # encoding: [0x0f,0xc3,0x01]
   5705 ; X86-NEXT:    retl # encoding: [0xc3]
   5706 ;
   5707 ; X64-LABEL: test_mm_stream_si32:
   5708 ; X64:       # %bb.0:
   5709 ; X64-NEXT:    movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37]
   5710 ; X64-NEXT:    retq # encoding: [0xc3]
   5711   store i32 %a1, i32* %a0, align 1, !nontemporal !0
   5712   ret void
   5713 }
   5714 
   5715 define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
   5716 ; X86-SSE-LABEL: test_mm_stream_si128:
   5717 ; X86-SSE:       # %bb.0:
   5718 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5719 ; X86-SSE-NEXT:    movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00]
   5720 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
   5721 ;
   5722 ; X86-AVX1-LABEL: test_mm_stream_si128:
   5723 ; X86-AVX1:       # %bb.0:
   5724 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5725 ; X86-AVX1-NEXT:    vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00]
   5726 ; X86-AVX1-NEXT:    retl # encoding: [0xc3]
   5727 ;
   5728 ; X86-AVX512-LABEL: test_mm_stream_si128:
   5729 ; X86-AVX512:       # %bb.0:
   5730 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5731 ; X86-AVX512-NEXT:    vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00]
   5732 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
   5733 ;
   5734 ; X64-SSE-LABEL: test_mm_stream_si128:
   5735 ; X64-SSE:       # %bb.0:
   5736 ; X64-SSE-NEXT:    movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07]
   5737 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
   5738 ;
   5739 ; X64-AVX1-LABEL: test_mm_stream_si128:
   5740 ; X64-AVX1:       # %bb.0:
   5741 ; X64-AVX1-NEXT:    vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07]
   5742 ; X64-AVX1-NEXT:    retq # encoding: [0xc3]
   5743 ;
   5744 ; X64-AVX512-LABEL: test_mm_stream_si128:
   5745 ; X64-AVX512:       # %bb.0:
   5746 ; X64-AVX512-NEXT:    vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
   5747 ; X64-AVX512-NEXT:    retq # encoding: [0xc3]
   5748   store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
   5749   ret void
   5750 }
   5751 
   5752 define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   5753 ; SSE-LABEL: test_mm_sub_epi8:
   5754 ; SSE:       # %bb.0:
   5755 ; SSE-NEXT:    psubb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf8,0xc1]
   5756 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5757 ;
   5758 ; AVX1-LABEL: test_mm_sub_epi8:
   5759 ; AVX1:       # %bb.0:
   5760 ; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
   5761 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5762 ;
   5763 ; AVX512-LABEL: test_mm_sub_epi8:
   5764 ; AVX512:       # %bb.0:
   5765 ; AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
   5766 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5767   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   5768   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   5769   %res = sub <16 x i8> %arg0, %arg1
   5770   %bc = bitcast <16 x i8> %res to <2 x i64>
   5771   ret <2 x i64> %bc
   5772 }
   5773 
   5774 define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   5775 ; SSE-LABEL: test_mm_sub_epi16:
   5776 ; SSE:       # %bb.0:
   5777 ; SSE-NEXT:    psubw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf9,0xc1]
   5778 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5779 ;
   5780 ; AVX1-LABEL: test_mm_sub_epi16:
   5781 ; AVX1:       # %bb.0:
   5782 ; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf9,0xc1]
   5783 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5784 ;
   5785 ; AVX512-LABEL: test_mm_sub_epi16:
   5786 ; AVX512:       # %bb.0:
   5787 ; AVX512-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1]
   5788 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5789   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   5790   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   5791   %res = sub <8 x i16> %arg0, %arg1
   5792   %bc = bitcast <8 x i16> %res to <2 x i64>
   5793   ret <2 x i64> %bc
   5794 }
   5795 
   5796 define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   5797 ; SSE-LABEL: test_mm_sub_epi32:
   5798 ; SSE:       # %bb.0:
   5799 ; SSE-NEXT:    psubd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfa,0xc1]
   5800 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5801 ;
   5802 ; AVX1-LABEL: test_mm_sub_epi32:
   5803 ; AVX1:       # %bb.0:
   5804 ; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfa,0xc1]
   5805 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5806 ;
   5807 ; AVX512-LABEL: test_mm_sub_epi32:
   5808 ; AVX512:       # %bb.0:
   5809 ; AVX512-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1]
   5810 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5811   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   5812   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   5813   %res = sub <4 x i32> %arg0, %arg1
   5814   %bc = bitcast <4 x i32> %res to <2 x i64>
   5815   ret <2 x i64> %bc
   5816 }
   5817 
   5818 define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   5819 ; SSE-LABEL: test_mm_sub_epi64:
   5820 ; SSE:       # %bb.0:
   5821 ; SSE-NEXT:    psubq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfb,0xc1]
   5822 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5823 ;
   5824 ; AVX1-LABEL: test_mm_sub_epi64:
   5825 ; AVX1:       # %bb.0:
   5826 ; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
   5827 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5828 ;
   5829 ; AVX512-LABEL: test_mm_sub_epi64:
   5830 ; AVX512:       # %bb.0:
   5831 ; AVX512-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
   5832 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5833   %res = sub <2 x i64> %a0, %a1
   5834   ret <2 x i64> %res
   5835 }
   5836 
   5837 define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   5838 ; SSE-LABEL: test_mm_sub_pd:
   5839 ; SSE:       # %bb.0:
   5840 ; SSE-NEXT:    subpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5c,0xc1]
   5841 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5842 ;
   5843 ; AVX1-LABEL: test_mm_sub_pd:
   5844 ; AVX1:       # %bb.0:
   5845 ; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5c,0xc1]
   5846 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5847 ;
   5848 ; AVX512-LABEL: test_mm_sub_pd:
   5849 ; AVX512:       # %bb.0:
   5850 ; AVX512-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5c,0xc1]
   5851 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5852   %res = fsub <2 x double> %a0, %a1
   5853   ret <2 x double> %res
   5854 }
   5855 
   5856 define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   5857 ; SSE-LABEL: test_mm_sub_sd:
   5858 ; SSE:       # %bb.0:
   5859 ; SSE-NEXT:    subsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5c,0xc1]
   5860 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5861 ;
   5862 ; AVX1-LABEL: test_mm_sub_sd:
   5863 ; AVX1:       # %bb.0:
   5864 ; AVX1-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5c,0xc1]
   5865 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5866 ;
   5867 ; AVX512-LABEL: test_mm_sub_sd:
   5868 ; AVX512:       # %bb.0:
   5869 ; AVX512-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1]
   5870 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5871   %ext0 = extractelement <2 x double> %a0, i32 0
   5872   %ext1 = extractelement <2 x double> %a1, i32 0
   5873   %fsub = fsub double %ext0, %ext1
   5874   %res = insertelement <2 x double> %a0, double %fsub, i32 0
   5875   ret <2 x double> %res
   5876 }
   5877 
   5878 define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   5879 ; SSE-LABEL: test_mm_subs_epi8:
   5880 ; SSE:       # %bb.0:
   5881 ; SSE-NEXT:    psubsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe8,0xc1]
   5882 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5883 ;
   5884 ; AVX1-LABEL: test_mm_subs_epi8:
   5885 ; AVX1:       # %bb.0:
   5886 ; AVX1-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe8,0xc1]
   5887 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5888 ;
   5889 ; AVX512-LABEL: test_mm_subs_epi8:
   5890 ; AVX512:       # %bb.0:
   5891 ; AVX512-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
   5892 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5893   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   5894   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   5895   %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
   5896   %bc = bitcast <16 x i8> %res to <2 x i64>
   5897   ret <2 x i64> %bc
   5898 }
   5899 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
   5900 
   5901 define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   5902 ; SSE-LABEL: test_mm_subs_epi16:
   5903 ; SSE:       # %bb.0:
   5904 ; SSE-NEXT:    psubsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe9,0xc1]
   5905 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5906 ;
   5907 ; AVX1-LABEL: test_mm_subs_epi16:
   5908 ; AVX1:       # %bb.0:
   5909 ; AVX1-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe9,0xc1]
   5910 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5911 ;
   5912 ; AVX512-LABEL: test_mm_subs_epi16:
   5913 ; AVX512:       # %bb.0:
   5914 ; AVX512-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
   5915 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5916   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   5917   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   5918   %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
   5919   %bc = bitcast <8 x i16> %res to <2 x i64>
   5920   ret <2 x i64> %bc
   5921 }
   5922 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
   5923 
   5924 define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   5925 ; SSE-LABEL: test_mm_subs_epu8:
   5926 ; SSE:       # %bb.0:
   5927 ; SSE-NEXT:    psubusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd8,0xc1]
   5928 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5929 ;
   5930 ; AVX1-LABEL: test_mm_subs_epu8:
   5931 ; AVX1:       # %bb.0:
   5932 ; AVX1-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd8,0xc1]
   5933 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5934 ;
   5935 ; AVX512-LABEL: test_mm_subs_epu8:
   5936 ; AVX512:       # %bb.0:
   5937 ; AVX512-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
   5938 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5939   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   5940   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   5941   %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
   5942   %bc = bitcast <16 x i8> %res to <2 x i64>
   5943   ret <2 x i64> %bc
   5944 }
   5945 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
   5946 
   5947 define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   5948 ; SSE-LABEL: test_mm_subs_epu16:
   5949 ; SSE:       # %bb.0:
   5950 ; SSE-NEXT:    psubusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd9,0xc1]
   5951 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5952 ;
   5953 ; AVX1-LABEL: test_mm_subs_epu16:
   5954 ; AVX1:       # %bb.0:
   5955 ; AVX1-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd9,0xc1]
   5956 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5957 ;
   5958 ; AVX512-LABEL: test_mm_subs_epu16:
   5959 ; AVX512:       # %bb.0:
   5960 ; AVX512-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
   5961 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5962   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   5963   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   5964   %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
   5965   %bc = bitcast <8 x i16> %res to <2 x i64>
   5966   ret <2 x i64> %bc
   5967 }
   5968 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
   5969 
   5970 define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   5971 ; SSE-LABEL: test_mm_ucomieq_sd:
   5972 ; SSE:       # %bb.0:
   5973 ; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
   5974 ; SSE-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
   5975 ; SSE-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
   5976 ; SSE-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
   5977 ; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   5978 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5979 ;
   5980 ; AVX1-LABEL: test_mm_ucomieq_sd:
   5981 ; AVX1:       # %bb.0:
   5982 ; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
   5983 ; AVX1-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
   5984 ; AVX1-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
   5985 ; AVX1-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
   5986 ; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   5987 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5988 ;
   5989 ; AVX512-LABEL: test_mm_ucomieq_sd:
   5990 ; AVX512:       # %bb.0:
   5991 ; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
   5992 ; AVX512-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
   5993 ; AVX512-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
   5994 ; AVX512-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
   5995 ; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   5996 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5997   %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
   5998   ret i32 %res
   5999 }
   6000 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
   6001 
   6002 define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   6003 ; SSE-LABEL: test_mm_ucomige_sd:
   6004 ; SSE:       # %bb.0:
   6005 ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6006 ; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
   6007 ; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   6008 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6009 ;
   6010 ; AVX1-LABEL: test_mm_ucomige_sd:
   6011 ; AVX1:       # %bb.0:
   6012 ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6013 ; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
   6014 ; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   6015 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6016 ;
   6017 ; AVX512-LABEL: test_mm_ucomige_sd:
   6018 ; AVX512:       # %bb.0:
   6019 ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6020 ; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
   6021 ; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   6022 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6023   %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
   6024   ret i32 %res
   6025 }
   6026 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
   6027 
   6028 define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   6029 ; SSE-LABEL: test_mm_ucomigt_sd:
   6030 ; SSE:       # %bb.0:
   6031 ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6032 ; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
   6033 ; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   6034 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6035 ;
   6036 ; AVX1-LABEL: test_mm_ucomigt_sd:
   6037 ; AVX1:       # %bb.0:
   6038 ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6039 ; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
   6040 ; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   6041 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6042 ;
   6043 ; AVX512-LABEL: test_mm_ucomigt_sd:
   6044 ; AVX512:       # %bb.0:
   6045 ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6046 ; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
   6047 ; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   6048 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6049   %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
   6050   ret i32 %res
   6051 }
   6052 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
   6053 
   6054 define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   6055 ; SSE-LABEL: test_mm_ucomile_sd:
   6056 ; SSE:       # %bb.0:
   6057 ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6058 ; SSE-NEXT:    ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8]
   6059 ; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   6060 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6061 ;
   6062 ; AVX1-LABEL: test_mm_ucomile_sd:
   6063 ; AVX1:       # %bb.0:
   6064 ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6065 ; AVX1-NEXT:    vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8]
   6066 ; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   6067 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6068 ;
   6069 ; AVX512-LABEL: test_mm_ucomile_sd:
   6070 ; AVX512:       # %bb.0:
   6071 ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6072 ; AVX512-NEXT:    vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
   6073 ; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
   6074 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6075   %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
   6076   ret i32 %res
   6077 }
   6078 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
   6079 
   6080 define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   6081 ; SSE-LABEL: test_mm_ucomilt_sd:
   6082 ; SSE:       # %bb.0:
   6083 ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6084 ; SSE-NEXT:    ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8]
   6085 ; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   6086 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6087 ;
   6088 ; AVX1-LABEL: test_mm_ucomilt_sd:
   6089 ; AVX1:       # %bb.0:
   6090 ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6091 ; AVX1-NEXT:    vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8]
   6092 ; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   6093 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6094 ;
   6095 ; AVX512-LABEL: test_mm_ucomilt_sd:
   6096 ; AVX512:       # %bb.0:
   6097 ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
   6098 ; AVX512-NEXT:    vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
   6099 ; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
   6100 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6101   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
   6102   ret i32 %res
   6103 }
   6104 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
   6105 
   6106 define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
   6107 ; SSE-LABEL: test_mm_ucomineq_sd:
   6108 ; SSE:       # %bb.0:
   6109 ; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
   6110 ; SSE-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
   6111 ; SSE-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
   6112 ; SSE-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
   6113 ; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   6114 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6115 ;
   6116 ; AVX1-LABEL: test_mm_ucomineq_sd:
   6117 ; AVX1:       # %bb.0:
   6118 ; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
   6119 ; AVX1-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
   6120 ; AVX1-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
   6121 ; AVX1-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
   6122 ; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   6123 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6124 ;
   6125 ; AVX512-LABEL: test_mm_ucomineq_sd:
   6126 ; AVX512:       # %bb.0:
   6127 ; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
   6128 ; AVX512-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
   6129 ; AVX512-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
   6130 ; AVX512-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
   6131 ; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
   6132 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6133   %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
   6134   ret i32 %res
   6135 }
   6136 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
   6137 
   6138 define <2 x double> @test_mm_undefined_pd() {
   6139 ; CHECK-LABEL: test_mm_undefined_pd:
   6140 ; CHECK:       # %bb.0:
   6141 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6142   ret <2 x double> undef
   6143 }
   6144 
   6145 define <2 x i64> @test_mm_undefined_si128() {
   6146 ; CHECK-LABEL: test_mm_undefined_si128:
   6147 ; CHECK:       # %bb.0:
   6148 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6149   ret <2 x i64> undef
   6150 }
   6151 
   6152 define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
   6153 ; SSE-LABEL: test_mm_unpackhi_epi8:
   6154 ; SSE:       # %bb.0:
   6155 ; SSE-NEXT:    punpckhbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x68,0xc1]
   6156 ; SSE-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
   6157 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6158 ;
   6159 ; AVX1-LABEL: test_mm_unpackhi_epi8:
   6160 ; AVX1:       # %bb.0:
   6161 ; AVX1-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x68,0xc1]
   6162 ; AVX1-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
   6163 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6164 ;
   6165 ; AVX512-LABEL: test_mm_unpackhi_epi8:
   6166 ; AVX512:       # %bb.0:
   6167 ; AVX512-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1]
   6168 ; AVX512-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
   6169 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6170   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   6171   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   6172   %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   6173   %bc = bitcast <16 x i8> %res to <2 x i64>
   6174   ret <2 x i64> %bc
   6175 }
   6176 
   6177 define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
   6178 ; SSE-LABEL: test_mm_unpackhi_epi16:
   6179 ; SSE:       # %bb.0:
   6180 ; SSE-NEXT:    punpckhwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x69,0xc1]
   6181 ; SSE-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   6182 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6183 ;
   6184 ; AVX1-LABEL: test_mm_unpackhi_epi16:
   6185 ; AVX1:       # %bb.0:
   6186 ; AVX1-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x69,0xc1]
   6187 ; AVX1-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   6188 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6189 ;
   6190 ; AVX512-LABEL: test_mm_unpackhi_epi16:
   6191 ; AVX512:       # %bb.0:
   6192 ; AVX512-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1]
   6193 ; AVX512-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   6194 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6195   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   6196   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   6197   %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   6198   %bc = bitcast <8 x i16> %res to <2 x i64>
   6199   ret <2 x i64> %bc
   6200 }
   6201 
   6202 define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
   6203 ; SSE-LABEL: test_mm_unpackhi_epi32:
   6204 ; SSE:       # %bb.0:
   6205 ; SSE-NEXT:    unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1]
   6206 ; SSE-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   6207 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6208 ;
   6209 ; AVX1-LABEL: test_mm_unpackhi_epi32:
   6210 ; AVX1:       # %bb.0:
   6211 ; AVX1-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1]
   6212 ; AVX1-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   6213 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6214 ;
   6215 ; AVX512-LABEL: test_mm_unpackhi_epi32:
   6216 ; AVX512:       # %bb.0:
   6217 ; AVX512-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
   6218 ; AVX512-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   6219 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6220   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   6221   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   6222   %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   6223   %bc = bitcast <4 x i32> %res to <2 x i64>
   6224   ret <2 x i64> %bc
   6225 }
   6226 
   6227 define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
   6228 ; SSE-LABEL: test_mm_unpackhi_epi64:
   6229 ; SSE:       # %bb.0:
   6230 ; SSE-NEXT:    unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1]
   6231 ; SSE-NEXT:    # xmm0 = xmm0[1],xmm1[1]
   6232 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6233 ;
   6234 ; AVX1-LABEL: test_mm_unpackhi_epi64:
   6235 ; AVX1:       # %bb.0:
   6236 ; AVX1-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
   6237 ; AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[1]
   6238 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6239 ;
   6240 ; AVX512-LABEL: test_mm_unpackhi_epi64:
   6241 ; AVX512:       # %bb.0:
   6242 ; AVX512-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
   6243 ; AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[1]
   6244 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6245   %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
   6246   ret <2 x i64> %res
   6247 }
   6248 
   6249 define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
   6250 ; SSE-LABEL: test_mm_unpackhi_pd:
   6251 ; SSE:       # %bb.0:
   6252 ; SSE-NEXT:    unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1]
   6253 ; SSE-NEXT:    # xmm0 = xmm0[1],xmm1[1]
   6254 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6255 ;
   6256 ; AVX1-LABEL: test_mm_unpackhi_pd:
   6257 ; AVX1:       # %bb.0:
   6258 ; AVX1-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
   6259 ; AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[1]
   6260 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6261 ;
   6262 ; AVX512-LABEL: test_mm_unpackhi_pd:
   6263 ; AVX512:       # %bb.0:
   6264 ; AVX512-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
   6265 ; AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[1]
   6266 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6267   %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
   6268   ret <2 x double> %res
   6269 }
   6270 
   6271 define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
   6272 ; SSE-LABEL: test_mm_unpacklo_epi8:
   6273 ; SSE:       # %bb.0:
   6274 ; SSE-NEXT:    punpcklbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x60,0xc1]
   6275 ; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   6276 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6277 ;
   6278 ; AVX1-LABEL: test_mm_unpacklo_epi8:
   6279 ; AVX1:       # %bb.0:
   6280 ; AVX1-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x60,0xc1]
   6281 ; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   6282 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6283 ;
   6284 ; AVX512-LABEL: test_mm_unpacklo_epi8:
   6285 ; AVX512:       # %bb.0:
   6286 ; AVX512-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1]
   6287 ; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   6288 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6289   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   6290   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   6291   %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   6292   %bc = bitcast <16 x i8> %res to <2 x i64>
   6293   ret <2 x i64> %bc
   6294 }
   6295 
   6296 define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
   6297 ; SSE-LABEL: test_mm_unpacklo_epi16:
   6298 ; SSE:       # %bb.0:
   6299 ; SSE-NEXT:    punpcklwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x61,0xc1]
   6300 ; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   6301 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6302 ;
   6303 ; AVX1-LABEL: test_mm_unpacklo_epi16:
   6304 ; AVX1:       # %bb.0:
   6305 ; AVX1-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x61,0xc1]
   6306 ; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   6307 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6308 ;
   6309 ; AVX512-LABEL: test_mm_unpacklo_epi16:
   6310 ; AVX512:       # %bb.0:
   6311 ; AVX512-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1]
   6312 ; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   6313 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6314   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   6315   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   6316   %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   6317   %bc = bitcast <8 x i16> %res to <2 x i64>
   6318   ret <2 x i64> %bc
   6319 }
   6320 
   6321 define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
   6322 ; SSE-LABEL: test_mm_unpacklo_epi32:
   6323 ; SSE:       # %bb.0:
   6324 ; SSE-NEXT:    unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1]
   6325 ; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   6326 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6327 ;
   6328 ; AVX1-LABEL: test_mm_unpacklo_epi32:
   6329 ; AVX1:       # %bb.0:
   6330 ; AVX1-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1]
   6331 ; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   6332 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6333 ;
   6334 ; AVX512-LABEL: test_mm_unpacklo_epi32:
   6335 ; AVX512:       # %bb.0:
   6336 ; AVX512-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1]
   6337 ; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   6338 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6339   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   6340   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   6341   %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   6342   %bc = bitcast <4 x i32> %res to <2 x i64>
   6343   ret <2 x i64> %bc
   6344 }
   6345 
   6346 define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
   6347 ; SSE-LABEL: test_mm_unpacklo_epi64:
   6348 ; SSE:       # %bb.0:
   6349 ; SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
   6350 ; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   6351 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6352 ;
   6353 ; AVX1-LABEL: test_mm_unpacklo_epi64:
   6354 ; AVX1:       # %bb.0:
   6355 ; AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
   6356 ; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   6357 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6358 ;
   6359 ; AVX512-LABEL: test_mm_unpacklo_epi64:
   6360 ; AVX512:       # %bb.0:
   6361 ; AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
   6362 ; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   6363 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6364   %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
   6365   ret <2 x i64> %res
   6366 }
   6367 
   6368 define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
   6369 ; SSE-LABEL: test_mm_unpacklo_pd:
   6370 ; SSE:       # %bb.0:
   6371 ; SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
   6372 ; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   6373 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6374 ;
   6375 ; AVX1-LABEL: test_mm_unpacklo_pd:
   6376 ; AVX1:       # %bb.0:
   6377 ; AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
   6378 ; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   6379 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6380 ;
   6381 ; AVX512-LABEL: test_mm_unpacklo_pd:
   6382 ; AVX512:       # %bb.0:
   6383 ; AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
   6384 ; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
   6385 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6386   %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
   6387   ret <2 x double> %res
   6388 }
   6389 
   6390 define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
   6391 ; SSE-LABEL: test_mm_xor_pd:
   6392 ; SSE:       # %bb.0:
   6393 ; SSE-NEXT:    xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1]
   6394 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6395 ;
   6396 ; AVX1-LABEL: test_mm_xor_pd:
   6397 ; AVX1:       # %bb.0:
   6398 ; AVX1-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
   6399 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6400 ;
   6401 ; AVX512-LABEL: test_mm_xor_pd:
   6402 ; AVX512:       # %bb.0:
   6403 ; AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
   6404 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6405   %arg0 = bitcast <2 x double> %a0 to <4 x i32>
   6406   %arg1 = bitcast <2 x double> %a1 to <4 x i32>
   6407   %res = xor <4 x i32> %arg0, %arg1
   6408   %bc = bitcast <4 x i32> %res to <2 x double>
   6409   ret <2 x double> %bc
   6410 }
   6411 
   6412 define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
   6413 ; SSE-LABEL: test_mm_xor_si128:
   6414 ; SSE:       # %bb.0:
   6415 ; SSE-NEXT:    xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1]
   6416 ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6417 ;
   6418 ; AVX1-LABEL: test_mm_xor_si128:
   6419 ; AVX1:       # %bb.0:
   6420 ; AVX1-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
   6421 ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6422 ;
   6423 ; AVX512-LABEL: test_mm_xor_si128:
   6424 ; AVX512:       # %bb.0:
   6425 ; AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
   6426 ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   6427   %res = xor <2 x i64> %a0, %a1
   6428   ret <2 x i64> %res
   6429 }
   6430 
   6431 !0 = !{i32 1}
   6432 
   6433