Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mcpu=skylake -mtriple=i386-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X86 %s
      3 ; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X64 %s
      4 ; RUN: llc < %s -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-avx512f | FileCheck --check-prefix=X64 %s
      5 ; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=-avx2 | FileCheck --check-prefix=NOGATHER %s
      6 
      7 declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i32> %passthro)
      8 
      9 define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
     10 ; X86-LABEL: masked_gather_v2i32:
     11 ; X86:       # %bb.0: # %entry
     12 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     13 ; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
     14 ; X86-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     15 ; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
     16 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
     17 ; X86-NEXT:    vpgatherdd %xmm0, (,%xmm2), %xmm1
     18 ; X86-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
     19 ; X86-NEXT:    retl
     20 ;
     21 ; X64-LABEL: masked_gather_v2i32:
     22 ; X64:       # %bb.0: # %entry
     23 ; X64-NEXT:    vmovdqa (%rdi), %xmm2
     24 ; X64-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     25 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     26 ; X64-NEXT:    vpslld $31, %xmm0, %xmm0
     27 ; X64-NEXT:    vpgatherqd %xmm0, (,%xmm2), %xmm1
     28 ; X64-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
     29 ; X64-NEXT:    retq
     30 ;
     31 ; NOGATHER-LABEL: masked_gather_v2i32:
     32 ; NOGATHER:       # %bb.0: # %entry
     33 ; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
     34 ; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
     35 ; NOGATHER-NEXT:    # implicit-def: $xmm2
     36 ; NOGATHER-NEXT:    testb $1, %al
     37 ; NOGATHER-NEXT:    je .LBB0_2
     38 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
     39 ; NOGATHER-NEXT:    vmovq %xmm3, %rax
     40 ; NOGATHER-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
     41 ; NOGATHER-NEXT:  .LBB0_2: # %else
     42 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
     43 ; NOGATHER-NEXT:    testb $1, %al
     44 ; NOGATHER-NEXT:    je .LBB0_4
     45 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
     46 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
     47 ; NOGATHER-NEXT:    movl (%rax), %eax
     48 ; NOGATHER-NEXT:    vpinsrq $1, %rax, %xmm2, %xmm2
     49 ; NOGATHER-NEXT:  .LBB0_4: # %else2
     50 ; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
     51 ; NOGATHER-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
     52 ; NOGATHER-NEXT:    retq
     53 entry:
     54   %ld  = load <2 x i32*>, <2 x i32*>* %ptr
     55   %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
     56   ret <2 x i32> %res
     57 }
     58 
     59 define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
     60 ; X86-LABEL: masked_gather_v2i32_concat:
     61 ; X86:       # %bb.0: # %entry
     62 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     63 ; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
     64 ; X86-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     65 ; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
     66 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
     67 ; X86-NEXT:    vpgatherdd %xmm0, (,%xmm2), %xmm1
     68 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
     69 ; X86-NEXT:    retl
     70 ;
     71 ; X64-LABEL: masked_gather_v2i32_concat:
     72 ; X64:       # %bb.0: # %entry
     73 ; X64-NEXT:    vmovdqa (%rdi), %xmm2
     74 ; X64-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     75 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     76 ; X64-NEXT:    vpslld $31, %xmm0, %xmm0
     77 ; X64-NEXT:    vpgatherqd %xmm0, (,%xmm2), %xmm1
     78 ; X64-NEXT:    vmovdqa %xmm1, %xmm0
     79 ; X64-NEXT:    retq
     80 ;
     81 ; NOGATHER-LABEL: masked_gather_v2i32_concat:
     82 ; NOGATHER:       # %bb.0: # %entry
     83 ; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
     84 ; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
     85 ; NOGATHER-NEXT:    # implicit-def: $xmm2
     86 ; NOGATHER-NEXT:    testb $1, %al
     87 ; NOGATHER-NEXT:    je .LBB1_2
     88 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
     89 ; NOGATHER-NEXT:    vmovq %xmm3, %rax
     90 ; NOGATHER-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
     91 ; NOGATHER-NEXT:  .LBB1_2: # %else
     92 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
     93 ; NOGATHER-NEXT:    testb $1, %al
     94 ; NOGATHER-NEXT:    je .LBB1_4
     95 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
     96 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
     97 ; NOGATHER-NEXT:    movl (%rax), %eax
     98 ; NOGATHER-NEXT:    vpinsrq $1, %rax, %xmm2, %xmm2
     99 ; NOGATHER-NEXT:  .LBB1_4: # %else2
    100 ; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
    101 ; NOGATHER-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
    102 ; NOGATHER-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
    103 ; NOGATHER-NEXT:    retq
    104 entry:
    105   %ld  = load <2 x i32*>, <2 x i32*>* %ptr
    106   %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
    107   %res2 = shufflevector <2 x i32> %res, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    108   ret <4 x i32> %res2
    109 }
    110 
    111 declare <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthro)
    112 
    113 define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) {
    114 ; X86-LABEL: masked_gather_v2float:
    115 ; X86:       # %bb.0: # %entry
    116 ; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
    117 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
    118 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    119 ; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
    120 ; X86-NEXT:    vgatherdps %xmm0, (,%xmm2), %xmm1
    121 ; X86-NEXT:    vmovaps %xmm1, %xmm0
    122 ; X86-NEXT:    retl
    123 ;
    124 ; X64-LABEL: masked_gather_v2float:
    125 ; X64:       # %bb.0: # %entry
    126 ; X64-NEXT:    vmovaps (%rdi), %xmm2
    127 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    128 ; X64-NEXT:    vpslld $31, %xmm0, %xmm0
    129 ; X64-NEXT:    vgatherqps %xmm0, (,%xmm2), %xmm1
    130 ; X64-NEXT:    vmovaps %xmm1, %xmm0
    131 ; X64-NEXT:    retq
    132 ;
    133 ; NOGATHER-LABEL: masked_gather_v2float:
    134 ; NOGATHER:       # %bb.0: # %entry
    135 ; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
    136 ; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
    137 ; NOGATHER-NEXT:    # implicit-def: $xmm2
    138 ; NOGATHER-NEXT:    testb $1, %al
    139 ; NOGATHER-NEXT:    je .LBB2_2
    140 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
    141 ; NOGATHER-NEXT:    vmovq %xmm3, %rax
    142 ; NOGATHER-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    143 ; NOGATHER-NEXT:  .LBB2_2: # %else
    144 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
    145 ; NOGATHER-NEXT:    testb $1, %al
    146 ; NOGATHER-NEXT:    je .LBB2_4
    147 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
    148 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    149 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
    150 ; NOGATHER-NEXT:  .LBB2_4: # %else2
    151 ; NOGATHER-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    152 ; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
    153 ; NOGATHER-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
    154 ; NOGATHER-NEXT:    retq
    155 entry:
    156   %ld  = load <2 x float*>, <2 x float*>* %ptr
    157   %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
    158   ret <2 x float> %res
    159 }
    160 
    161 define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) {
    162 ; X86-LABEL: masked_gather_v2float_concat:
    163 ; X86:       # %bb.0: # %entry
    164 ; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
    165 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
    166 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    167 ; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
    168 ; X86-NEXT:    vgatherdps %xmm0, (,%xmm2), %xmm1
    169 ; X86-NEXT:    vmovaps %xmm1, %xmm0
    170 ; X86-NEXT:    retl
    171 ;
    172 ; X64-LABEL: masked_gather_v2float_concat:
    173 ; X64:       # %bb.0: # %entry
    174 ; X64-NEXT:    vmovaps (%rdi), %xmm2
    175 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    176 ; X64-NEXT:    vpslld $31, %xmm0, %xmm0
    177 ; X64-NEXT:    vgatherqps %xmm0, (,%xmm2), %xmm1
    178 ; X64-NEXT:    vmovaps %xmm1, %xmm0
    179 ; X64-NEXT:    retq
    180 ;
    181 ; NOGATHER-LABEL: masked_gather_v2float_concat:
    182 ; NOGATHER:       # %bb.0: # %entry
    183 ; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
    184 ; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
    185 ; NOGATHER-NEXT:    # implicit-def: $xmm2
    186 ; NOGATHER-NEXT:    testb $1, %al
    187 ; NOGATHER-NEXT:    je .LBB3_2
    188 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
    189 ; NOGATHER-NEXT:    vmovq %xmm3, %rax
    190 ; NOGATHER-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    191 ; NOGATHER-NEXT:  .LBB3_2: # %else
    192 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
    193 ; NOGATHER-NEXT:    testb $1, %al
    194 ; NOGATHER-NEXT:    je .LBB3_4
    195 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
    196 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    197 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
    198 ; NOGATHER-NEXT:  .LBB3_4: # %else2
    199 ; NOGATHER-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    200 ; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
    201 ; NOGATHER-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
    202 ; NOGATHER-NEXT:    retq
    203 entry:
    204   %ld  = load <2 x float*>, <2 x float*>* %ptr
    205   %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
    206   %res2 = shufflevector <2 x float> %res, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    207   ret <4 x float> %res2
    208 }
    209 
    210 
    211 declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthro)
    212 
    213 define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i32> %passthro) {
    214 ; X86-LABEL: masked_gather_v4i32:
    215 ; X86:       # %bb.0: # %entry
    216 ; X86-NEXT:    vpslld $31, %xmm1, %xmm1
    217 ; X86-NEXT:    vpgatherdd %xmm1, (,%xmm0), %xmm2
    218 ; X86-NEXT:    vmovdqa %xmm2, %xmm0
    219 ; X86-NEXT:    retl
    220 ;
    221 ; X64-LABEL: masked_gather_v4i32:
    222 ; X64:       # %bb.0: # %entry
    223 ; X64-NEXT:    vpslld $31, %xmm1, %xmm1
    224 ; X64-NEXT:    vpgatherqd %xmm1, (,%ymm0), %xmm2
    225 ; X64-NEXT:    vmovdqa %xmm2, %xmm0
    226 ; X64-NEXT:    vzeroupper
    227 ; X64-NEXT:    retq
    228 ;
    229 ; NOGATHER-LABEL: masked_gather_v4i32:
    230 ; NOGATHER:       # %bb.0: # %entry
    231 ; NOGATHER-NEXT:    vpextrb $0, %xmm1, %eax
    232 ; NOGATHER-NEXT:    # implicit-def: $xmm3
    233 ; NOGATHER-NEXT:    testb $1, %al
    234 ; NOGATHER-NEXT:    je .LBB4_2
    235 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
    236 ; NOGATHER-NEXT:    vmovq %xmm0, %rax
    237 ; NOGATHER-NEXT:    vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
    238 ; NOGATHER-NEXT:  .LBB4_2: # %else
    239 ; NOGATHER-NEXT:    vpextrb $4, %xmm1, %eax
    240 ; NOGATHER-NEXT:    testb $1, %al
    241 ; NOGATHER-NEXT:    je .LBB4_4
    242 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
    243 ; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
    244 ; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm3, %xmm3
    245 ; NOGATHER-NEXT:  .LBB4_4: # %else2
    246 ; NOGATHER-NEXT:    vpextrb $8, %xmm1, %eax
    247 ; NOGATHER-NEXT:    testb $1, %al
    248 ; NOGATHER-NEXT:    je .LBB4_6
    249 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
    250 ; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm4
    251 ; NOGATHER-NEXT:    vmovq %xmm4, %rax
    252 ; NOGATHER-NEXT:    vpinsrd $2, (%rax), %xmm3, %xmm3
    253 ; NOGATHER-NEXT:  .LBB4_6: # %else5
    254 ; NOGATHER-NEXT:    vpextrb $12, %xmm1, %eax
    255 ; NOGATHER-NEXT:    testb $1, %al
    256 ; NOGATHER-NEXT:    je .LBB4_8
    257 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
    258 ; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
    259 ; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
    260 ; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm3, %xmm3
    261 ; NOGATHER-NEXT:  .LBB4_8: # %else8
    262 ; NOGATHER-NEXT:    vpslld $31, %xmm1, %xmm0
    263 ; NOGATHER-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
    264 ; NOGATHER-NEXT:    vzeroupper
    265 ; NOGATHER-NEXT:    retq
    266 entry:
    267   %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 0, <4 x i1> %masks, <4 x i32> %passthro)
    268   ret <4 x i32> %res
    269 }
    270 
    271 declare <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 %align, <4 x i1> %masks, <4 x float> %passthro)
    272 
    273 define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <4 x float> %passthro) {
    274 ; X86-LABEL: masked_gather_v4float:
    275 ; X86:       # %bb.0: # %entry
    276 ; X86-NEXT:    vpslld $31, %xmm1, %xmm1
    277 ; X86-NEXT:    vgatherdps %xmm1, (,%xmm0), %xmm2
    278 ; X86-NEXT:    vmovaps %xmm2, %xmm0
    279 ; X86-NEXT:    retl
    280 ;
    281 ; X64-LABEL: masked_gather_v4float:
    282 ; X64:       # %bb.0: # %entry
    283 ; X64-NEXT:    vpslld $31, %xmm1, %xmm1
    284 ; X64-NEXT:    vgatherqps %xmm1, (,%ymm0), %xmm2
    285 ; X64-NEXT:    vmovaps %xmm2, %xmm0
    286 ; X64-NEXT:    vzeroupper
    287 ; X64-NEXT:    retq
    288 ;
    289 ; NOGATHER-LABEL: masked_gather_v4float:
    290 ; NOGATHER:       # %bb.0: # %entry
    291 ; NOGATHER-NEXT:    vpextrb $0, %xmm1, %eax
    292 ; NOGATHER-NEXT:    # implicit-def: $xmm3
    293 ; NOGATHER-NEXT:    testb $1, %al
    294 ; NOGATHER-NEXT:    je .LBB5_2
    295 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
    296 ; NOGATHER-NEXT:    vmovq %xmm0, %rax
    297 ; NOGATHER-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
    298 ; NOGATHER-NEXT:  .LBB5_2: # %else
    299 ; NOGATHER-NEXT:    vpextrb $4, %xmm1, %eax
    300 ; NOGATHER-NEXT:    testb $1, %al
    301 ; NOGATHER-NEXT:    je .LBB5_4
    302 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
    303 ; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
    304 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
    305 ; NOGATHER-NEXT:  .LBB5_4: # %else2
    306 ; NOGATHER-NEXT:    vpextrb $8, %xmm1, %eax
    307 ; NOGATHER-NEXT:    testb $1, %al
    308 ; NOGATHER-NEXT:    je .LBB5_6
    309 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
    310 ; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm4
    311 ; NOGATHER-NEXT:    vmovq %xmm4, %rax
    312 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
    313 ; NOGATHER-NEXT:  .LBB5_6: # %else5
    314 ; NOGATHER-NEXT:    vpextrb $12, %xmm1, %eax
    315 ; NOGATHER-NEXT:    testb $1, %al
    316 ; NOGATHER-NEXT:    je .LBB5_8
    317 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
    318 ; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
    319 ; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
    320 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
    321 ; NOGATHER-NEXT:  .LBB5_8: # %else8
    322 ; NOGATHER-NEXT:    vpslld $31, %xmm1, %xmm0
    323 ; NOGATHER-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
    324 ; NOGATHER-NEXT:    vzeroupper
    325 ; NOGATHER-NEXT:    retq
    326 entry:
    327   %res = call <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 0, <4 x i1> %masks, <4 x float> %passthro)
    328   ret <4 x float> %res
    329 }
    330 
    331 declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 %align, <8 x i1> %masks, <8 x i32> %passthro)
    332 
    333 define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i32> %passthro) {
    334 ; X86-LABEL: masked_gather_v8i32:
    335 ; X86:       # %bb.0: # %entry
    336 ; X86-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    337 ; X86-NEXT:    vpslld $31, %ymm0, %ymm0
    338 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    339 ; X86-NEXT:    vmovdqa (%eax), %ymm2
    340 ; X86-NEXT:    vpgatherdd %ymm0, (,%ymm2), %ymm1
    341 ; X86-NEXT:    vmovdqa %ymm1, %ymm0
    342 ; X86-NEXT:    retl
    343 ;
    344 ; X64-LABEL: masked_gather_v8i32:
    345 ; X64:       # %bb.0: # %entry
    346 ; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    347 ; X64-NEXT:    vpslld $31, %ymm0, %ymm0
    348 ; X64-NEXT:    vpsrad $31, %ymm0, %ymm0
    349 ; X64-NEXT:    vmovdqa (%rdi), %ymm2
    350 ; X64-NEXT:    vmovdqa 32(%rdi), %ymm3
    351 ; X64-NEXT:    vextracti128 $1, %ymm1, %xmm4
    352 ; X64-NEXT:    vextracti128 $1, %ymm0, %xmm5
    353 ; X64-NEXT:    vpgatherqd %xmm5, (,%ymm3), %xmm4
    354 ; X64-NEXT:    vpgatherqd %xmm0, (,%ymm2), %xmm1
    355 ; X64-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm0
    356 ; X64-NEXT:    retq
    357 ;
    358 ; NOGATHER-LABEL: masked_gather_v8i32:
    359 ; NOGATHER:       # %bb.0: # %entry
    360 ; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm4
    361 ; NOGATHER-NEXT:    vmovdqa 32(%rdi), %ymm3
    362 ; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
    363 ; NOGATHER-NEXT:    # implicit-def: $ymm2
    364 ; NOGATHER-NEXT:    testb $1, %al
    365 ; NOGATHER-NEXT:    je .LBB6_2
    366 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
    367 ; NOGATHER-NEXT:    vmovq %xmm4, %rax
    368 ; NOGATHER-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
    369 ; NOGATHER-NEXT:  .LBB6_2: # %else
    370 ; NOGATHER-NEXT:    vpextrb $2, %xmm0, %eax
    371 ; NOGATHER-NEXT:    testb $1, %al
    372 ; NOGATHER-NEXT:    je .LBB6_4
    373 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
    374 ; NOGATHER-NEXT:    vpextrq $1, %xmm4, %rax
    375 ; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm2, %xmm5
    376 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
    377 ; NOGATHER-NEXT:  .LBB6_4: # %else2
    378 ; NOGATHER-NEXT:    vpextrb $4, %xmm0, %eax
    379 ; NOGATHER-NEXT:    testb $1, %al
    380 ; NOGATHER-NEXT:    je .LBB6_6
    381 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
    382 ; NOGATHER-NEXT:    vextractf128 $1, %ymm4, %xmm5
    383 ; NOGATHER-NEXT:    vmovq %xmm5, %rax
    384 ; NOGATHER-NEXT:    vpinsrd $2, (%rax), %xmm2, %xmm5
    385 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
    386 ; NOGATHER-NEXT:  .LBB6_6: # %else5
    387 ; NOGATHER-NEXT:    vpextrb $6, %xmm0, %eax
    388 ; NOGATHER-NEXT:    testb $1, %al
    389 ; NOGATHER-NEXT:    je .LBB6_8
    390 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
    391 ; NOGATHER-NEXT:    vextractf128 $1, %ymm4, %xmm4
    392 ; NOGATHER-NEXT:    vpextrq $1, %xmm4, %rax
    393 ; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm2, %xmm4
    394 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
    395 ; NOGATHER-NEXT:  .LBB6_8: # %else8
    396 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
    397 ; NOGATHER-NEXT:    testb $1, %al
    398 ; NOGATHER-NEXT:    je .LBB6_10
    399 ; NOGATHER-NEXT:  # %bb.9: # %cond.load10
    400 ; NOGATHER-NEXT:    vmovq %xmm3, %rax
    401 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
    402 ; NOGATHER-NEXT:    vpinsrd $0, (%rax), %xmm4, %xmm4
    403 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
    404 ; NOGATHER-NEXT:  .LBB6_10: # %else11
    405 ; NOGATHER-NEXT:    vpextrb $10, %xmm0, %eax
    406 ; NOGATHER-NEXT:    testb $1, %al
    407 ; NOGATHER-NEXT:    je .LBB6_12
    408 ; NOGATHER-NEXT:  # %bb.11: # %cond.load13
    409 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    410 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
    411 ; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm4, %xmm4
    412 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
    413 ; NOGATHER-NEXT:  .LBB6_12: # %else14
    414 ; NOGATHER-NEXT:    vpextrb $12, %xmm0, %eax
    415 ; NOGATHER-NEXT:    testb $1, %al
    416 ; NOGATHER-NEXT:    je .LBB6_14
    417 ; NOGATHER-NEXT:  # %bb.13: # %cond.load16
    418 ; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm4
    419 ; NOGATHER-NEXT:    vmovq %xmm4, %rax
    420 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
    421 ; NOGATHER-NEXT:    vpinsrd $2, (%rax), %xmm4, %xmm4
    422 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
    423 ; NOGATHER-NEXT:  .LBB6_14: # %else17
    424 ; NOGATHER-NEXT:    vpextrb $14, %xmm0, %eax
    425 ; NOGATHER-NEXT:    testb $1, %al
    426 ; NOGATHER-NEXT:    je .LBB6_16
    427 ; NOGATHER-NEXT:  # %bb.15: # %cond.load19
    428 ; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
    429 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    430 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
    431 ; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm3, %xmm3
    432 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
    433 ; NOGATHER-NEXT:  .LBB6_16: # %else20
    434 ; NOGATHER-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    435 ; NOGATHER-NEXT:    vpslld $31, %xmm3, %xmm3
    436 ; NOGATHER-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    437 ; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
    438 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
    439 ; NOGATHER-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
    440 ; NOGATHER-NEXT:    retq
    441 entry:
    442   %ld  = load <8 x i32*>, <8 x i32*>* %ptr
    443   %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ld, i32 0, <8 x i1> %masks, <8 x i32> %passthro)
    444   ret <8 x i32> %res
    445 }
    446 
    447 declare <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ptrs, i32 %align, <8 x i1> %masks, <8 x float> %passthro)
    448 
    449 define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <8 x float> %passthro) {
    450 ; X86-LABEL: masked_gather_v8float:
    451 ; X86:       # %bb.0: # %entry
    452 ; X86-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    453 ; X86-NEXT:    vpslld $31, %ymm0, %ymm0
    454 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    455 ; X86-NEXT:    vmovaps (%eax), %ymm2
    456 ; X86-NEXT:    vgatherdps %ymm0, (,%ymm2), %ymm1
    457 ; X86-NEXT:    vmovaps %ymm1, %ymm0
    458 ; X86-NEXT:    retl
    459 ;
    460 ; X64-LABEL: masked_gather_v8float:
    461 ; X64:       # %bb.0: # %entry
    462 ; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    463 ; X64-NEXT:    vpslld $31, %ymm0, %ymm0
    464 ; X64-NEXT:    vpsrad $31, %ymm0, %ymm0
    465 ; X64-NEXT:    vmovaps (%rdi), %ymm2
    466 ; X64-NEXT:    vmovaps 32(%rdi), %ymm3
    467 ; X64-NEXT:    vextractf128 $1, %ymm1, %xmm4
    468 ; X64-NEXT:    vextracti128 $1, %ymm0, %xmm5
    469 ; X64-NEXT:    vgatherqps %xmm5, (,%ymm3), %xmm4
    470 ; X64-NEXT:    vgatherqps %xmm0, (,%ymm2), %xmm1
    471 ; X64-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm0
    472 ; X64-NEXT:    retq
    473 ;
    474 ; NOGATHER-LABEL: masked_gather_v8float:
    475 ; NOGATHER:       # %bb.0: # %entry
    476 ; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm4
    477 ; NOGATHER-NEXT:    vmovdqa 32(%rdi), %ymm3
    478 ; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
    479 ; NOGATHER-NEXT:    # implicit-def: $ymm2
    480 ; NOGATHER-NEXT:    testb $1, %al
    481 ; NOGATHER-NEXT:    je .LBB7_2
    482 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
    483 ; NOGATHER-NEXT:    vmovq %xmm4, %rax
    484 ; NOGATHER-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    485 ; NOGATHER-NEXT:  .LBB7_2: # %else
    486 ; NOGATHER-NEXT:    vpextrb $2, %xmm0, %eax
    487 ; NOGATHER-NEXT:    testb $1, %al
    488 ; NOGATHER-NEXT:    je .LBB7_4
    489 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
    490 ; NOGATHER-NEXT:    vpextrq $1, %xmm4, %rax
    491 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm5 = xmm2[0],mem[0],xmm2[2,3]
    492 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
    493 ; NOGATHER-NEXT:  .LBB7_4: # %else2
    494 ; NOGATHER-NEXT:    vpextrb $4, %xmm0, %eax
    495 ; NOGATHER-NEXT:    testb $1, %al
    496 ; NOGATHER-NEXT:    je .LBB7_6
    497 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
    498 ; NOGATHER-NEXT:    vextractf128 $1, %ymm4, %xmm5
    499 ; NOGATHER-NEXT:    vmovq %xmm5, %rax
    500 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm5 = xmm2[0,1],mem[0],xmm2[3]
    501 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
    502 ; NOGATHER-NEXT:  .LBB7_6: # %else5
    503 ; NOGATHER-NEXT:    vpextrb $6, %xmm0, %eax
    504 ; NOGATHER-NEXT:    testb $1, %al
    505 ; NOGATHER-NEXT:    je .LBB7_8
    506 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
    507 ; NOGATHER-NEXT:    vextractf128 $1, %ymm4, %xmm4
    508 ; NOGATHER-NEXT:    vpextrq $1, %xmm4, %rax
    509 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm4 = xmm2[0,1,2],mem[0]
    510 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
    511 ; NOGATHER-NEXT:  .LBB7_8: # %else8
    512 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
    513 ; NOGATHER-NEXT:    testb $1, %al
    514 ; NOGATHER-NEXT:    je .LBB7_10
    515 ; NOGATHER-NEXT:  # %bb.9: # %cond.load10
    516 ; NOGATHER-NEXT:    vmovq %xmm3, %rax
    517 ; NOGATHER-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
    518 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm5
    519 ; NOGATHER-NEXT:    vblendps {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3]
    520 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
    521 ; NOGATHER-NEXT:  .LBB7_10: # %else11
    522 ; NOGATHER-NEXT:    vpextrb $10, %xmm0, %eax
    523 ; NOGATHER-NEXT:    testb $1, %al
    524 ; NOGATHER-NEXT:    je .LBB7_12
    525 ; NOGATHER-NEXT:  # %bb.11: # %cond.load13
    526 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    527 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
    528 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0],mem[0],xmm4[2,3]
    529 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
    530 ; NOGATHER-NEXT:  .LBB7_12: # %else14
    531 ; NOGATHER-NEXT:    vpextrb $12, %xmm0, %eax
    532 ; NOGATHER-NEXT:    testb $1, %al
    533 ; NOGATHER-NEXT:    je .LBB7_14
    534 ; NOGATHER-NEXT:  # %bb.13: # %cond.load16
    535 ; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm4
    536 ; NOGATHER-NEXT:    vmovq %xmm4, %rax
    537 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
    538 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],mem[0],xmm4[3]
    539 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
    540 ; NOGATHER-NEXT:  .LBB7_14: # %else17
    541 ; NOGATHER-NEXT:    vpextrb $14, %xmm0, %eax
    542 ; NOGATHER-NEXT:    testb $1, %al
    543 ; NOGATHER-NEXT:    je .LBB7_16
    544 ; NOGATHER-NEXT:  # %bb.15: # %cond.load19
    545 ; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
    546 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    547 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
    548 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
    549 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
    550 ; NOGATHER-NEXT:  .LBB7_16: # %else20
    551 ; NOGATHER-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    552 ; NOGATHER-NEXT:    vpslld $31, %xmm3, %xmm3
    553 ; NOGATHER-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    554 ; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
    555 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
    556 ; NOGATHER-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
    557 ; NOGATHER-NEXT:    retq
    558 entry:
    559   %ld  = load <8 x float*>, <8 x float*>* %ptr
    560   %res = call <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ld, i32 0, <8 x i1> %masks, <8 x float> %passthro)
    561   ret <8 x float> %res
    562 }
    563 
    564 declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i64> %passthro)
    565 
    566 define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i64> %passthro) {
    567 ; X86-LABEL: masked_gather_v4i64:
    568 ; X86:       # %bb.0: # %entry
    569 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
    570 ; X86-NEXT:    vpmovsxdq %xmm0, %ymm0
    571 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    572 ; X86-NEXT:    vmovdqa (%eax), %xmm2
    573 ; X86-NEXT:    vpgatherdq %ymm0, (,%xmm2), %ymm1
    574 ; X86-NEXT:    vmovdqa %ymm1, %ymm0
    575 ; X86-NEXT:    retl
    576 ;
    577 ; X64-LABEL: masked_gather_v4i64:
    578 ; X64:       # %bb.0: # %entry
    579 ; X64-NEXT:    vpslld $31, %xmm0, %xmm0
    580 ; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
    581 ; X64-NEXT:    vmovdqa (%rdi), %ymm2
    582 ; X64-NEXT:    vpgatherqq %ymm0, (,%ymm2), %ymm1
    583 ; X64-NEXT:    vmovdqa %ymm1, %ymm0
    584 ; X64-NEXT:    retq
    585 ;
    586 ; NOGATHER-LABEL: masked_gather_v4i64:
    587 ; NOGATHER:       # %bb.0: # %entry
    588 ; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm3
    589 ; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
    590 ; NOGATHER-NEXT:    # implicit-def: $ymm2
    591 ; NOGATHER-NEXT:    testb $1, %al
    592 ; NOGATHER-NEXT:    je .LBB8_2
    593 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
    594 ; NOGATHER-NEXT:    vmovq %xmm3, %rax
    595 ; NOGATHER-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
    596 ; NOGATHER-NEXT:  .LBB8_2: # %else
    597 ; NOGATHER-NEXT:    vpextrb $4, %xmm0, %eax
    598 ; NOGATHER-NEXT:    testb $1, %al
    599 ; NOGATHER-NEXT:    je .LBB8_4
    600 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
    601 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    602 ; NOGATHER-NEXT:    vpinsrq $1, (%rax), %xmm2, %xmm4
    603 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
    604 ; NOGATHER-NEXT:  .LBB8_4: # %else2
    605 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
    606 ; NOGATHER-NEXT:    testb $1, %al
    607 ; NOGATHER-NEXT:    je .LBB8_6
    608 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
    609 ; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm4
    610 ; NOGATHER-NEXT:    vmovq %xmm4, %rax
    611 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
    612 ; NOGATHER-NEXT:    vpinsrq $0, (%rax), %xmm4, %xmm4
    613 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
    614 ; NOGATHER-NEXT:  .LBB8_6: # %else5
    615 ; NOGATHER-NEXT:    vpextrb $12, %xmm0, %eax
    616 ; NOGATHER-NEXT:    testb $1, %al
    617 ; NOGATHER-NEXT:    je .LBB8_8
    618 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
    619 ; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
    620 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    621 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
    622 ; NOGATHER-NEXT:    vpinsrq $1, (%rax), %xmm3, %xmm3
    623 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
    624 ; NOGATHER-NEXT:  .LBB8_8: # %else8
    625 ; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
    626 ; NOGATHER-NEXT:    vpsrad $31, %xmm0, %xmm0
    627 ; NOGATHER-NEXT:    vpmovsxdq %xmm0, %xmm3
    628 ; NOGATHER-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    629 ; NOGATHER-NEXT:    vpmovsxdq %xmm0, %xmm0
    630 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
    631 ; NOGATHER-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
    632 ; NOGATHER-NEXT:    retq
    633 entry:
    634   %ld  = load <4 x i64*>, <4 x i64*>* %ptr
    635   %res = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ld, i32 0, <4 x i1> %masks, <4 x i64> %passthro)
    636   ret <4 x i64> %res
    637 }
    638 
    639 declare <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ptrs, i32 %align, <4 x i1> %masks, <4 x double> %passthro)
    640 
    641 define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks, <4 x double> %passthro) {
    642 ; X86-LABEL: masked_gather_v4double:
    643 ; X86:       # %bb.0: # %entry
    644 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
    645 ; X86-NEXT:    vpmovsxdq %xmm0, %ymm0
    646 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    647 ; X86-NEXT:    vmovapd (%eax), %xmm2
    648 ; X86-NEXT:    vgatherdpd %ymm0, (,%xmm2), %ymm1
    649 ; X86-NEXT:    vmovapd %ymm1, %ymm0
    650 ; X86-NEXT:    retl
    651 ;
    652 ; X64-LABEL: masked_gather_v4double:
    653 ; X64:       # %bb.0: # %entry
    654 ; X64-NEXT:    vpslld $31, %xmm0, %xmm0
    655 ; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
    656 ; X64-NEXT:    vmovapd (%rdi), %ymm2
    657 ; X64-NEXT:    vgatherqpd %ymm0, (,%ymm2), %ymm1
    658 ; X64-NEXT:    vmovapd %ymm1, %ymm0
    659 ; X64-NEXT:    retq
    660 ;
    661 ; NOGATHER-LABEL: masked_gather_v4double:
    662 ; NOGATHER:       # %bb.0: # %entry
    663 ; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm3
    664 ; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
    665 ; NOGATHER-NEXT:    # implicit-def: $ymm2
    666 ; NOGATHER-NEXT:    testb $1, %al
    667 ; NOGATHER-NEXT:    je .LBB9_2
    668 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
    669 ; NOGATHER-NEXT:    vmovq %xmm3, %rax
    670 ; NOGATHER-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
    671 ; NOGATHER-NEXT:  .LBB9_2: # %else
    672 ; NOGATHER-NEXT:    vpextrb $4, %xmm0, %eax
    673 ; NOGATHER-NEXT:    testb $1, %al
    674 ; NOGATHER-NEXT:    je .LBB9_4
    675 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
    676 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    677 ; NOGATHER-NEXT:    vmovhpd {{.*#+}} xmm4 = xmm2[0],mem[0]
    678 ; NOGATHER-NEXT:    vblendpd {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3]
    679 ; NOGATHER-NEXT:  .LBB9_4: # %else2
    680 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
    681 ; NOGATHER-NEXT:    testb $1, %al
    682 ; NOGATHER-NEXT:    je .LBB9_6
    683 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
    684 ; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm4
    685 ; NOGATHER-NEXT:    vmovq %xmm4, %rax
    686 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
    687 ; NOGATHER-NEXT:    vmovlpd {{.*#+}} xmm4 = mem[0],xmm4[1]
    688 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
    689 ; NOGATHER-NEXT:  .LBB9_6: # %else5
    690 ; NOGATHER-NEXT:    vpextrb $12, %xmm0, %eax
    691 ; NOGATHER-NEXT:    testb $1, %al
    692 ; NOGATHER-NEXT:    je .LBB9_8
    693 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
    694 ; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
    695 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    696 ; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
    697 ; NOGATHER-NEXT:    vmovhpd {{.*#+}} xmm3 = xmm3[0],mem[0]
    698 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
    699 ; NOGATHER-NEXT:  .LBB9_8: # %else8
    700 ; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
    701 ; NOGATHER-NEXT:    vpsrad $31, %xmm0, %xmm0
    702 ; NOGATHER-NEXT:    vpmovsxdq %xmm0, %xmm3
    703 ; NOGATHER-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    704 ; NOGATHER-NEXT:    vpmovsxdq %xmm0, %xmm0
    705 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
    706 ; NOGATHER-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
    707 ; NOGATHER-NEXT:    retq
    708 entry:
    709   %ld  = load <4 x double*>, <4 x double*>* %ptr
    710   %res = call <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ld, i32 0, <4 x i1> %masks, <4 x double> %passthro)
    711   ret <4 x double> %res
    712 }
    713 
    714 declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i64> %passthro)
    715 
    716 define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i64> %passthro) {
    717 ; X86-LABEL: masked_gather_v2i64:
    718 ; X86:       # %bb.0: # %entry
    719 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    720 ; X86-NEXT:    vpmovsxdq (%eax), %xmm2
    721 ; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
    722 ; X86-NEXT:    vpgatherqq %xmm0, (,%xmm2), %xmm1
    723 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
    724 ; X86-NEXT:    retl
    725 ;
    726 ; X64-LABEL: masked_gather_v2i64:
    727 ; X64:       # %bb.0: # %entry
    728 ; X64-NEXT:    vpsllq $63, %xmm0, %xmm0
    729 ; X64-NEXT:    vmovdqa (%rdi), %xmm2
    730 ; X64-NEXT:    vpgatherqq %xmm0, (,%xmm2), %xmm1
    731 ; X64-NEXT:    vmovdqa %xmm1, %xmm0
    732 ; X64-NEXT:    retq
    733 ;
    734 ; NOGATHER-LABEL: masked_gather_v2i64:
    735 ; NOGATHER:       # %bb.0: # %entry
    736 ; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
    737 ; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
    738 ; NOGATHER-NEXT:    # implicit-def: $xmm2
    739 ; NOGATHER-NEXT:    testb $1, %al
    740 ; NOGATHER-NEXT:    je .LBB10_2
    741 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
    742 ; NOGATHER-NEXT:    vmovq %xmm3, %rax
    743 ; NOGATHER-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
    744 ; NOGATHER-NEXT:  .LBB10_2: # %else
    745 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
    746 ; NOGATHER-NEXT:    testb $1, %al
    747 ; NOGATHER-NEXT:    je .LBB10_4
    748 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
    749 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    750 ; NOGATHER-NEXT:    vpinsrq $1, (%rax), %xmm2, %xmm2
    751 ; NOGATHER-NEXT:  .LBB10_4: # %else2
    752 ; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
    753 ; NOGATHER-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
    754 ; NOGATHER-NEXT:    retq
    755 entry:
    756   %ld  = load <2 x i64*>, <2 x i64*>* %ptr
    757   %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ld, i32 0, <2 x i1> %masks, <2 x i64> %passthro)
    758   ret <2 x i64> %res
    759 }
    760 
    761 declare <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ptrs, i32 %align, <2 x i1> %masks, <2 x double> %passthro)
    762 
    763 define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks, <2 x double> %passthro) {
    764 ; X86-LABEL: masked_gather_v2double:
    765 ; X86:       # %bb.0: # %entry
    766 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    767 ; X86-NEXT:    vpmovsxdq (%eax), %xmm2
    768 ; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
    769 ; X86-NEXT:    vgatherqpd %xmm0, (,%xmm2), %xmm1
    770 ; X86-NEXT:    vmovapd %xmm1, %xmm0
    771 ; X86-NEXT:    retl
    772 ;
    773 ; X64-LABEL: masked_gather_v2double:
    774 ; X64:       # %bb.0: # %entry
    775 ; X64-NEXT:    vpsllq $63, %xmm0, %xmm0
    776 ; X64-NEXT:    vmovapd (%rdi), %xmm2
    777 ; X64-NEXT:    vgatherqpd %xmm0, (,%xmm2), %xmm1
    778 ; X64-NEXT:    vmovapd %xmm1, %xmm0
    779 ; X64-NEXT:    retq
    780 ;
    781 ; NOGATHER-LABEL: masked_gather_v2double:
    782 ; NOGATHER:       # %bb.0: # %entry
    783 ; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
    784 ; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
    785 ; NOGATHER-NEXT:    # implicit-def: $xmm2
    786 ; NOGATHER-NEXT:    testb $1, %al
    787 ; NOGATHER-NEXT:    je .LBB11_2
    788 ; NOGATHER-NEXT:  # %bb.1: # %cond.load
    789 ; NOGATHER-NEXT:    vmovq %xmm3, %rax
    790 ; NOGATHER-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
    791 ; NOGATHER-NEXT:  .LBB11_2: # %else
    792 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
    793 ; NOGATHER-NEXT:    testb $1, %al
    794 ; NOGATHER-NEXT:    je .LBB11_4
    795 ; NOGATHER-NEXT:  # %bb.3: # %cond.load1
    796 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
    797 ; NOGATHER-NEXT:    vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0]
    798 ; NOGATHER-NEXT:  .LBB11_4: # %else2
    799 ; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
    800 ; NOGATHER-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
    801 ; NOGATHER-NEXT:    retq
    802 entry:
    803   %ld  = load <2 x double*>, <2 x double*>* %ptr
    804   %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> %masks, <2 x double> %passthro)
    805   ret <2 x double> %res
    806 }
    807 
    808