Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
      3 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
      4 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
      5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
      6 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
      7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
      8 
      9 ; This test works just like the non-upgrade one except that it only checks
     10 ; forms which require auto-upgrading.
     11 
     12 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
     13 ; SSE-LABEL: test_x86_sse41_blendpd:
     14 ; SSE:       ## %bb.0:
     15 ; SSE-NEXT:    blendps $12, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x0c]
     16 ; SSE-NEXT:    ## xmm0 = xmm0[0,1],xmm1[2,3]
     17 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     18 ;
     19 ; AVX-LABEL: test_x86_sse41_blendpd:
     20 ; AVX:       ## %bb.0:
     21 ; AVX-NEXT:    vblendps $3, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x03]
     22 ; AVX-NEXT:    ## xmm0 = xmm0[0,1],xmm1[2,3]
     23 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     24   %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 6) ; <<2 x double>> [#uses=1]
     25   ret <2 x double> %res
     26 }
     27 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
     28 
     29 
     30 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
     31 ; SSE-LABEL: test_x86_sse41_blendps:
     32 ; SSE:       ## %bb.0:
     33 ; SSE-NEXT:    blendps $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x07]
     34 ; SSE-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3]
     35 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     36 ;
     37 ; AVX-LABEL: test_x86_sse41_blendps:
     38 ; AVX:       ## %bb.0:
     39 ; AVX-NEXT:    vblendps $8, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
     40 ; AVX-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3]
     41 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     42   %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
     43   ret <4 x float> %res
     44 }
     45 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
     46 
     47 
     48 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
     49 ; SSE-LABEL: test_x86_sse41_dppd:
     50 ; SSE:       ## %bb.0:
     51 ; SSE-NEXT:    dppd $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x41,0xc1,0x07]
     52 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     53 ;
     54 ; AVX-LABEL: test_x86_sse41_dppd:
     55 ; AVX:       ## %bb.0:
     56 ; AVX-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x41,0xc1,0x07]
     57 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     58   %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
     59   ret <2 x double> %res
     60 }
     61 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
     62 
     63 
     64 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
     65 ; SSE-LABEL: test_x86_sse41_dpps:
     66 ; SSE:       ## %bb.0:
     67 ; SSE-NEXT:    dpps $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x40,0xc1,0x07]
     68 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     69 ;
     70 ; AVX-LABEL: test_x86_sse41_dpps:
     71 ; AVX:       ## %bb.0:
     72 ; AVX-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x40,0xc1,0x07]
     73 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     74   %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
     75   ret <4 x float> %res
     76 }
     77 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
     78 
     79 
     80 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
     81 ; SSE-LABEL: test_x86_sse41_insertps:
     82 ; SSE:       ## %bb.0:
     83 ; SSE-NEXT:    insertps $17, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x21,0xc1,0x11]
     84 ; SSE-NEXT:    ## xmm0 = zero,xmm1[0],xmm0[2,3]
     85 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     86 ;
     87 ; AVX1-LABEL: test_x86_sse41_insertps:
     88 ; AVX1:       ## %bb.0:
     89 ; AVX1-NEXT:    vinsertps $17, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11]
     90 ; AVX1-NEXT:    ## xmm0 = zero,xmm1[0],xmm0[2,3]
     91 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     92 ;
     93 ; AVX512-LABEL: test_x86_sse41_insertps:
     94 ; AVX512:       ## %bb.0:
     95 ; AVX512-NEXT:    vinsertps $17, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11]
     96 ; AVX512-NEXT:    ## xmm0 = zero,xmm1[0],xmm0[2,3]
     97 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     98   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 17) ; <<4 x float>> [#uses=1]
     99   ret <4 x float> %res
    100 }
    101 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
    102 
    103 
    104 define <2 x i64> @test_x86_sse41_movntdqa(<2 x i64>* %a0) {
    105 ; X86-SSE-LABEL: test_x86_sse41_movntdqa:
    106 ; X86-SSE:       ## %bb.0:
    107 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    108 ; X86-SSE-NEXT:    movntdqa (%eax), %xmm0 ## encoding: [0x66,0x0f,0x38,0x2a,0x00]
    109 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
    110 ;
    111 ; X86-AVX1-LABEL: test_x86_sse41_movntdqa:
    112 ; X86-AVX1:       ## %bb.0:
    113 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    114 ; X86-AVX1-NEXT:    vmovntdqa (%eax), %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2a,0x00]
    115 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
    116 ;
    117 ; X86-AVX512-LABEL: test_x86_sse41_movntdqa:
    118 ; X86-AVX512:       ## %bb.0:
    119 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    120 ; X86-AVX512-NEXT:    vmovntdqa (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2a,0x00]
    121 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
    122 ;
    123 ; X64-SSE-LABEL: test_x86_sse41_movntdqa:
    124 ; X64-SSE:       ## %bb.0:
    125 ; X64-SSE-NEXT:    movntdqa (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x38,0x2a,0x07]
    126 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
    127 ;
    128 ; X64-AVX1-LABEL: test_x86_sse41_movntdqa:
    129 ; X64-AVX1:       ## %bb.0:
    130 ; X64-AVX1-NEXT:    vmovntdqa (%rdi), %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2a,0x07]
    131 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
    132 ;
    133 ; X64-AVX512-LABEL: test_x86_sse41_movntdqa:
    134 ; X64-AVX512:       ## %bb.0:
    135 ; X64-AVX512-NEXT:    vmovntdqa (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2a,0x07]
    136 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    137   %arg0 = bitcast <2 x i64>* %a0 to i8*
    138   %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %arg0)
    139   ret <2 x i64> %res
    140 }
    141 declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone
    142 
    143 
    144 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
    145 ; SSE-LABEL: test_x86_sse41_mpsadbw:
    146 ; SSE:       ## %bb.0:
    147 ; SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0xc1,0x07]
    148 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    149 ;
    150 ; AVX-LABEL: test_x86_sse41_mpsadbw:
    151 ; AVX:       ## %bb.0:
    152 ; AVX-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0xc1,0x07]
    153 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    154   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
    155   ret <8 x i16> %res
    156 }
    157 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
    158 
    159 
    160 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
    161 ; SSE-LABEL: test_x86_sse41_pblendw:
    162 ; SSE:       ## %bb.0:
    163 ; SSE-NEXT:    pblendw $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0e,0xc1,0x07]
    164 ; SSE-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
    165 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    166 ;
    167 ; AVX-LABEL: test_x86_sse41_pblendw:
    168 ; AVX:       ## %bb.0:
    169 ; AVX-NEXT:    vpblendw $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x07]
    170 ; AVX-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
    171 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    172   %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
    173   ret <8 x i16> %res
    174 }
    175 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
    176 
    177 
    178 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
    179 ; SSE-LABEL: test_x86_sse41_pmovsxbd:
    180 ; SSE:       ## %bb.0:
    181 ; SSE-NEXT:    pmovsxbd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x21,0xc0]
    182 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    183 ;
    184 ; AVX1-LABEL: test_x86_sse41_pmovsxbd:
    185 ; AVX1:       ## %bb.0:
    186 ; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x21,0xc0]
    187 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    188 ;
    189 ; AVX512-LABEL: test_x86_sse41_pmovsxbd:
    190 ; AVX512:       ## %bb.0:
    191 ; AVX512-NEXT:    vpmovsxbd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0]
    192 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    193   %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
    194   ret <4 x i32> %res
    195 }
    196 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
    197 
    198 
    199 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
    200 ; SSE-LABEL: test_x86_sse41_pmovsxbq:
    201 ; SSE:       ## %bb.0:
    202 ; SSE-NEXT:    pmovsxbq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x22,0xc0]
    203 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    204 ;
    205 ; AVX1-LABEL: test_x86_sse41_pmovsxbq:
    206 ; AVX1:       ## %bb.0:
    207 ; AVX1-NEXT:    vpmovsxbq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x22,0xc0]
    208 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    209 ;
    210 ; AVX512-LABEL: test_x86_sse41_pmovsxbq:
    211 ; AVX512:       ## %bb.0:
    212 ; AVX512-NEXT:    vpmovsxbq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0]
    213 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    214   %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
    215   ret <2 x i64> %res
    216 }
    217 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
    218 
    219 
    220 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
    221 ; SSE-LABEL: test_x86_sse41_pmovsxbw:
    222 ; SSE:       ## %bb.0:
    223 ; SSE-NEXT:    pmovsxbw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x20,0xc0]
    224 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    225 ;
    226 ; AVX1-LABEL: test_x86_sse41_pmovsxbw:
    227 ; AVX1:       ## %bb.0:
    228 ; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x20,0xc0]
    229 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    230 ;
    231 ; AVX512-LABEL: test_x86_sse41_pmovsxbw:
    232 ; AVX512:       ## %bb.0:
    233 ; AVX512-NEXT:    vpmovsxbw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0]
    234 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    235   %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    236   ret <8 x i16> %res
    237 }
    238 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
    239 
    240 
    241 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
    242 ; SSE-LABEL: test_x86_sse41_pmovsxdq:
    243 ; SSE:       ## %bb.0:
    244 ; SSE-NEXT:    pmovsxdq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x25,0xc0]
    245 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    246 ;
    247 ; AVX1-LABEL: test_x86_sse41_pmovsxdq:
    248 ; AVX1:       ## %bb.0:
    249 ; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x25,0xc0]
    250 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    251 ;
    252 ; AVX512-LABEL: test_x86_sse41_pmovsxdq:
    253 ; AVX512:       ## %bb.0:
    254 ; AVX512-NEXT:    vpmovsxdq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0]
    255 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    256   %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
    257   ret <2 x i64> %res
    258 }
    259 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
    260 
    261 
    262 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
    263 ; SSE-LABEL: test_x86_sse41_pmovsxwd:
    264 ; SSE:       ## %bb.0:
    265 ; SSE-NEXT:    pmovsxwd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x23,0xc0]
    266 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    267 ;
    268 ; AVX1-LABEL: test_x86_sse41_pmovsxwd:
    269 ; AVX1:       ## %bb.0:
    270 ; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x23,0xc0]
    271 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    272 ;
    273 ; AVX512-LABEL: test_x86_sse41_pmovsxwd:
    274 ; AVX512:       ## %bb.0:
    275 ; AVX512-NEXT:    vpmovsxwd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0]
    276 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    277   %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
    278   ret <4 x i32> %res
    279 }
    280 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
    281 
    282 
    283 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
    284 ; SSE-LABEL: test_x86_sse41_pmovsxwq:
    285 ; SSE:       ## %bb.0:
    286 ; SSE-NEXT:    pmovsxwq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x24,0xc0]
    287 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    288 ;
    289 ; AVX1-LABEL: test_x86_sse41_pmovsxwq:
    290 ; AVX1:       ## %bb.0:
    291 ; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x24,0xc0]
    292 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    293 ;
    294 ; AVX512-LABEL: test_x86_sse41_pmovsxwq:
    295 ; AVX512:       ## %bb.0:
    296 ; AVX512-NEXT:    vpmovsxwq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0]
    297 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    298   %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
    299   ret <2 x i64> %res
    300 }
    301 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
    302 
    303 
    304 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
    305 ; SSE-LABEL: test_x86_sse41_pmovzxbd:
    306 ; SSE:       ## %bb.0:
    307 ; SSE-NEXT:    pmovzxbd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x31,0xc0]
    308 ; SSE-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    309 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    310 ;
    311 ; AVX1-LABEL: test_x86_sse41_pmovzxbd:
    312 ; AVX1:       ## %bb.0:
    313 ; AVX1-NEXT:    vpmovzxbd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x31,0xc0]
    314 ; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    315 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    316 ;
    317 ; AVX512-LABEL: test_x86_sse41_pmovzxbd:
    318 ; AVX512:       ## %bb.0:
    319 ; AVX512-NEXT:    vpmovzxbd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0]
    320 ; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    321 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    322   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
    323   ret <4 x i32> %res
    324 }
    325 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
    326 
    327 
    328 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
    329 ; SSE-LABEL: test_x86_sse41_pmovzxbq:
    330 ; SSE:       ## %bb.0:
    331 ; SSE-NEXT:    pmovzxbq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x32,0xc0]
    332 ; SSE-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    333 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    334 ;
    335 ; AVX1-LABEL: test_x86_sse41_pmovzxbq:
    336 ; AVX1:       ## %bb.0:
    337 ; AVX1-NEXT:    vpmovzxbq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x32,0xc0]
    338 ; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    339 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    340 ;
    341 ; AVX512-LABEL: test_x86_sse41_pmovzxbq:
    342 ; AVX512:       ## %bb.0:
    343 ; AVX512-NEXT:    vpmovzxbq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0]
    344 ; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    345 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    346   %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
    347   ret <2 x i64> %res
    348 }
    349 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
    350 
    351 
    352 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
    353 ; SSE-LABEL: test_x86_sse41_pmovzxbw:
    354 ; SSE:       ## %bb.0:
    355 ; SSE-NEXT:    pmovzxbw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x30,0xc0]
    356 ; SSE-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    357 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    358 ;
    359 ; AVX1-LABEL: test_x86_sse41_pmovzxbw:
    360 ; AVX1:       ## %bb.0:
    361 ; AVX1-NEXT:    vpmovzxbw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x30,0xc0]
    362 ; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    363 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    364 ;
    365 ; AVX512-LABEL: test_x86_sse41_pmovzxbw:
    366 ; AVX512:       ## %bb.0:
    367 ; AVX512-NEXT:    vpmovzxbw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0]
    368 ; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    369 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    370   %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    371   ret <8 x i16> %res
    372 }
    373 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
    374 
    375 
    376 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
    377 ; SSE-LABEL: test_x86_sse41_pmovzxdq:
    378 ; SSE:       ## %bb.0:
    379 ; SSE-NEXT:    pmovzxdq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x35,0xc0]
    380 ; SSE-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero
    381 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    382 ;
    383 ; AVX1-LABEL: test_x86_sse41_pmovzxdq:
    384 ; AVX1:       ## %bb.0:
    385 ; AVX1-NEXT:    vpmovzxdq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x35,0xc0]
    386 ; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero
    387 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    388 ;
    389 ; AVX512-LABEL: test_x86_sse41_pmovzxdq:
    390 ; AVX512:       ## %bb.0:
    391 ; AVX512-NEXT:    vpmovzxdq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0]
    392 ; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero
    393 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    394   %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
    395   ret <2 x i64> %res
    396 }
    397 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
    398 
    399 
    400 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
    401 ; SSE-LABEL: test_x86_sse41_pmovzxwd:
    402 ; SSE:       ## %bb.0:
    403 ; SSE-NEXT:    pmovzxwd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x33,0xc0]
    404 ; SSE-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    405 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    406 ;
    407 ; AVX1-LABEL: test_x86_sse41_pmovzxwd:
    408 ; AVX1:       ## %bb.0:
    409 ; AVX1-NEXT:    vpmovzxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x33,0xc0]
    410 ; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    411 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    412 ;
    413 ; AVX512-LABEL: test_x86_sse41_pmovzxwd:
    414 ; AVX512:       ## %bb.0:
    415 ; AVX512-NEXT:    vpmovzxwd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
    416 ; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    417 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    418   %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
    419   ret <4 x i32> %res
    420 }
    421 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
    422 
    423 
    424 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
    425 ; SSE-LABEL: test_x86_sse41_pmovzxwq:
    426 ; SSE:       ## %bb.0:
    427 ; SSE-NEXT:    pmovzxwq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x34,0xc0]
    428 ; SSE-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    429 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    430 ;
    431 ; AVX1-LABEL: test_x86_sse41_pmovzxwq:
    432 ; AVX1:       ## %bb.0:
    433 ; AVX1-NEXT:    vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
    434 ; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    435 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    436 ;
    437 ; AVX512-LABEL: test_x86_sse41_pmovzxwq:
    438 ; AVX512:       ## %bb.0:
    439 ; AVX512-NEXT:    vpmovzxwq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0]
    440 ; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    441 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    442   %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
    443   ret <2 x i64> %res
    444 }
    445 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
    446 
    447 define <16 x i8> @max_epi8(<16 x i8> %a0, <16 x i8> %a1) {
    448 ; SSE-LABEL: max_epi8:
    449 ; SSE:       ## %bb.0:
    450 ; SSE-NEXT:    pmaxsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3c,0xc1]
    451 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    452 ;
    453 ; AVX1-LABEL: max_epi8:
    454 ; AVX1:       ## %bb.0:
    455 ; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3c,0xc1]
    456 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    457 ;
    458 ; AVX512-LABEL: max_epi8:
    459 ; AVX512:       ## %bb.0:
    460 ; AVX512-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3c,0xc1]
    461 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    462   %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
    463   ret <16 x i8> %res
    464 }
    465 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
    466 
    467 define <16 x i8> @min_epi8(<16 x i8> %a0, <16 x i8> %a1) {
    468 ; SSE-LABEL: min_epi8:
    469 ; SSE:       ## %bb.0:
    470 ; SSE-NEXT:    pminsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x38,0xc1]
    471 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    472 ;
    473 ; AVX1-LABEL: min_epi8:
    474 ; AVX1:       ## %bb.0:
    475 ; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x38,0xc1]
    476 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    477 ;
    478 ; AVX512-LABEL: min_epi8:
    479 ; AVX512:       ## %bb.0:
    480 ; AVX512-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x38,0xc1]
    481 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    482   %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
    483   ret <16 x i8> %res
    484 }
    485 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
    486 
    487 define <8 x i16> @max_epu16(<8 x i16> %a0, <8 x i16> %a1) {
    488 ; SSE-LABEL: max_epu16:
    489 ; SSE:       ## %bb.0:
    490 ; SSE-NEXT:    pmaxuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3e,0xc1]
    491 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    492 ;
    493 ; AVX1-LABEL: max_epu16:
    494 ; AVX1:       ## %bb.0:
    495 ; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3e,0xc1]
    496 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    497 ;
    498 ; AVX512-LABEL: max_epu16:
    499 ; AVX512:       ## %bb.0:
    500 ; AVX512-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1]
    501 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    502   %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
    503   ret <8 x i16> %res
    504 }
    505 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
    506 
    507 define <8 x i16> @min_epu16(<8 x i16> %a0, <8 x i16> %a1) {
    508 ; SSE-LABEL: min_epu16:
    509 ; SSE:       ## %bb.0:
    510 ; SSE-NEXT:    pminuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3a,0xc1]
    511 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    512 ;
    513 ; AVX1-LABEL: min_epu16:
    514 ; AVX1:       ## %bb.0:
    515 ; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3a,0xc1]
    516 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    517 ;
    518 ; AVX512-LABEL: min_epu16:
    519 ; AVX512:       ## %bb.0:
    520 ; AVX512-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1]
    521 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    522   %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
    523   ret <8 x i16> %res
    524 }
    525 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
    526 
    527 define <4 x i32> @max_epi32(<4 x i32> %a0, <4 x i32> %a1) {
    528 ; SSE-LABEL: max_epi32:
    529 ; SSE:       ## %bb.0:
    530 ; SSE-NEXT:    pmaxsd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3d,0xc1]
    531 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    532 ;
    533 ; AVX1-LABEL: max_epi32:
    534 ; AVX1:       ## %bb.0:
    535 ; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3d,0xc1]
    536 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    537 ;
    538 ; AVX512-LABEL: max_epi32:
    539 ; AVX512:       ## %bb.0:
    540 ; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3d,0xc1]
    541 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    542   %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
    543   ret <4 x i32> %res
    544 }
    545 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
    546 
    547 define <4 x i32> @min_epi32(<4 x i32> %a0, <4 x i32> %a1) {
    548 ; SSE-LABEL: min_epi32:
    549 ; SSE:       ## %bb.0:
    550 ; SSE-NEXT:    pminsd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x39,0xc1]
    551 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    552 ;
    553 ; AVX1-LABEL: min_epi32:
    554 ; AVX1:       ## %bb.0:
    555 ; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x39,0xc1]
    556 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    557 ;
    558 ; AVX512-LABEL: min_epi32:
    559 ; AVX512:       ## %bb.0:
    560 ; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x39,0xc1]
    561 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    562   %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
    563   ret <4 x i32> %res
    564 }
    565 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
    566 
    567 define <4 x i32> @max_epu32(<4 x i32> %a0, <4 x i32> %a1) {
    568 ; SSE-LABEL: max_epu32:
    569 ; SSE:       ## %bb.0:
    570 ; SSE-NEXT:    pmaxud %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3f,0xc1]
    571 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    572 ;
    573 ; AVX1-LABEL: max_epu32:
    574 ; AVX1:       ## %bb.0:
    575 ; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3f,0xc1]
    576 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    577 ;
    578 ; AVX512-LABEL: max_epu32:
    579 ; AVX512:       ## %bb.0:
    580 ; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3f,0xc1]
    581 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    582   %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
    583   ret <4 x i32> %res
    584 }
    585 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
    586 
    587 define <4 x i32> @min_epu32(<4 x i32> %a0, <4 x i32> %a1) {
    588 ; SSE-LABEL: min_epu32:
    589 ; SSE:       ## %bb.0:
    590 ; SSE-NEXT:    pminud %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3b,0xc1]
    591 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    592 ;
    593 ; AVX1-LABEL: min_epu32:
    594 ; AVX1:       ## %bb.0:
    595 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3b,0xc1]
    596 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    597 ;
    598 ; AVX512-LABEL: min_epu32:
    599 ; AVX512:       ## %bb.0:
    600 ; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3b,0xc1]
    601 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    602   %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
    603   ret <4 x i32> %res
    604 }
    605 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
    606 
    607 
    608 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
    609 ; SSE-LABEL: test_x86_sse41_pmuldq:
    610 ; SSE:       ## %bb.0:
    611 ; SSE-NEXT:    pmuldq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x28,0xc1]
    612 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    613 ;
    614 ; AVX1-LABEL: test_x86_sse41_pmuldq:
    615 ; AVX1:       ## %bb.0:
    616 ; AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x28,0xc1]
    617 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    618 ;
    619 ; AVX512-LABEL: test_x86_sse41_pmuldq:
    620 ; AVX512:       ## %bb.0:
    621 ; AVX512-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1]
    622 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    623   %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
    624   ret <2 x i64> %res
    625 }
    626 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
    627