Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
      4 
      5 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
      6 ; ALL-LABEL: A:
      7 ; ALL:       ## BB#0: ## %entry
      8 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
      9 ; ALL-NEXT:    retq
     10 entry:
     11   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
     12   ret <8 x float> %shuffle
     13 }
     14 
     15 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
     16 ; ALL-LABEL: B:
     17 ; ALL:       ## BB#0: ## %entry
     18 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
     19 ; ALL-NEXT:    retq
     20 entry:
     21   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
     22   ret <8 x float> %shuffle
     23 }
     24 
     25 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
     26 ; ALL-LABEL: C:
     27 ; ALL:       ## BB#0: ## %entry
     28 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     29 ; ALL-NEXT:    retq
     30 entry:
     31   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     32   ret <8 x float> %shuffle
     33 }
     34 
     35 define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
     36 ; ALL-LABEL: D:
     37 ; ALL:       ## BB#0: ## %entry
     38 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
     39 ; ALL-NEXT:    retq
     40 entry:
     41   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
     42   ret <8 x float> %shuffle
     43 }
     44 
     45 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
     46 ; ALL-LABEL: E:
     47 ; ALL:       ## BB#0: ## %entry
     48 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
     49 ; ALL-NEXT:    retq
     50 entry:
     51   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
     52   ret <32 x i8> %shuffle
     53 }
     54 
     55 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
     56 ; ALL-LABEL: E2:
     57 ; ALL:       ## BB#0: ## %entry
     58 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
     59 ; ALL-NEXT:    retq
     60 entry:
     61   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
     62   ret <4 x i64> %shuffle
     63 }
     64 
     65 define <32 x i8> @Ei(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
     66 ; AVX1-LABEL: Ei:
     67 ; AVX1:       ## BB#0: ## %entry
     68 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
     69 ; AVX1-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
     70 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     71 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
     72 ; AVX1-NEXT:    retq
     73 ;
     74 ; AVX2-LABEL: Ei:
     75 ; AVX2:       ## BB#0: ## %entry
     76 ; AVX2-NEXT:    vpaddb {{.*}}(%rip), %ymm0, %ymm0
     77 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
     78 ; AVX2-NEXT:    retq
     79 entry:
     80   ; add forces execution domain
     81   %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
     82   %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
     83   ret <32 x i8> %shuffle
     84 }
     85 
     86 define <4 x i64> @E2i(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
     87 ; AVX1-LABEL: E2i:
     88 ; AVX1:       ## BB#0: ## %entry
     89 ; AVX1-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
     90 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
     91 ; AVX1-NEXT:    retq
     92 ;
     93 ; AVX2-LABEL: E2i:
     94 ; AVX2:       ## BB#0: ## %entry
     95 ; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
     96 ; AVX2-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
     97 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
     98 ; AVX2-NEXT:    retq
     99 entry:
    100   ; add forces execution domain
    101   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
    102   %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
    103   ret <4 x i64> %shuffle
    104 }
    105 
    106 define <8 x i32> @E3i(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
    107 ; AVX1-LABEL: E3i:
    108 ; AVX1:       ## BB#0: ## %entry
    109 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    110 ; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
    111 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    112 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    113 ; AVX1-NEXT:    retq
    114 ;
    115 ; AVX2-LABEL: E3i:
    116 ; AVX2:       ## BB#0: ## %entry
    117 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
    118 ; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
    119 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    120 ; AVX2-NEXT:    retq
    121 entry:
    122   ; add forces execution domain
    123   %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    124   %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15>
    125   ret <8 x i32> %shuffle
    126 }
    127 
    128 define <16 x i16> @E4i(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
    129 ; AVX1-LABEL: E4i:
    130 ; AVX1:       ## BB#0: ## %entry
    131 ; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
    132 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    133 ; AVX1-NEXT:    retq
    134 ;
    135 ; AVX2-LABEL: E4i:
    136 ; AVX2:       ## BB#0: ## %entry
    137 ; AVX2-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
    138 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    139 ; AVX2-NEXT:    retq
    140 entry:
    141   ; add forces execution domain
    142   %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    143   %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    144   ret <16 x i16> %shuffle
    145 }
    146 
    147 define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
    148 ; AVX1-LABEL: E5i:
    149 ; AVX1:       ## BB#0: ## %entry
    150 ; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
    151 ; AVX1-NEXT:    vmovaps (%rsi), %ymm1
    152 ; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
    153 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    154 ; AVX1-NEXT:    retq
    155 ;
    156 ; AVX2-LABEL: E5i:
    157 ; AVX2:       ## BB#0: ## %entry
    158 ; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
    159 ; AVX2-NEXT:    vmovdqa (%rsi), %ymm1
    160 ; AVX2-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
    161 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    162 ; AVX2-NEXT:    retq
    163 entry:
    164   %c = load <16 x i16>, <16 x i16>* %a
    165   %d = load <16 x i16>, <16 x i16>* %b
    166   %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    167   %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    168   ret <16 x i16> %shuffle
    169 }
    170 
    171 ;;;; Cases with undef indicies mixed in the mask
    172 
    173 define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
    174 ; ALL-LABEL: F:
    175 ; ALL:       ## BB#0: ## %entry
    176 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    177 ; ALL-NEXT:    retq
    178 entry:
    179   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11>
    180   ret <8 x float> %shuffle
    181 }
    182 
    183 define <8 x float> @F2(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
    184 ; ALL-LABEL: F2:
    185 ; ALL:       ## BB#0: ## %entry
    186 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
    187 ; ALL-NEXT:    retq
    188 entry:
    189   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7>
    190   ret <8 x float> %shuffle
    191 }
    192 
    193 define <8 x float> @F3(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
    194 ; ALL-LABEL: F3:
    195 ; ALL:       ## BB#0: ## %entry
    196 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    197 ; ALL-NEXT:    retq
    198 entry:
    199   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 10, i32 11>
    200   ret <8 x float> %shuffle
    201 }
    202 
    203 define <8 x float> @F4(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
    204 ; ALL-LABEL: F4:
    205 ; ALL:       ## BB#0: ## %entry
    206 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    207 ; ALL-NEXT:    retq
    208 entry:
    209   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15>
    210   ret <8 x float> %shuffle
    211 }
    212 
    213 define <8 x float> @F5(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
    214 ; ALL-LABEL: F5:
    215 ; ALL:       ## BB#0: ## %entry
    216 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
    217 ; ALL-NEXT:    retq
    218 entry:
    219   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
    220   ret <8 x float> %shuffle
    221 }
    222 
    223 define <8 x float> @F6(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
    224 ; ALL-LABEL: F6:
    225 ; ALL:       ## BB#0: ## %entry
    226 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    227 ; ALL-NEXT:    retq
    228 entry:
    229   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
    230   ret <8 x float> %shuffle
    231 }
    232 
    233 define <8 x float> @F7(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
    234 ; ALL-LABEL: F7:
    235 ; ALL:       ## BB#0: ## %entry
    236 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
    237 ; ALL-NEXT:    retq
    238 entry:
    239   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7>
    240   ret <8 x float> %shuffle
    241 }
    242 
    243 define <8 x float> @F8(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
    244 ; ALL-LABEL: F8:
    245 ; ALL:       ## BB#0: ## %entry
    246 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    247 ; ALL-NEXT:    retq
    248 entry:
    249   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15>
    250   ret <8 x float> %shuffle
    251 }
    252 
    253 ;;;; Cases we must not select vperm2f128
    254 
    255 define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
    256 ; ALL-LABEL: G:
    257 ; ALL:       ## BB#0: ## %entry
    258 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    259 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
    260 ; ALL-NEXT:    retq
    261 entry:
    262   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
    263   ret <8 x float> %shuffle
    264 }
    265 
    266 ;; Test zero mask generation.
    267 ;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984
    268 ;; Prefer xor+vblendpd over vperm2f128 because that has better performance.
    269 
    270 define <4 x double> @vperm2z_0x08(<4 x double> %a) {
    271 ; ALL-LABEL: vperm2z_0x08:
    272 ; ALL:       ## BB#0:
    273 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
    274 ; ALL-NEXT:    retq
    275   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
    276   ret <4 x double> %s
    277 }
    278 
    279 define <4 x double> @vperm2z_0x18(<4 x double> %a) {
    280 ; ALL-LABEL: vperm2z_0x18:
    281 ; ALL:       ## BB#0:
    282 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
    283 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
    284 ; ALL-NEXT:    retq
    285   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
    286   ret <4 x double> %s
    287 }
    288 
    289 define <4 x double> @vperm2z_0x28(<4 x double> %a) {
    290 ; ALL-LABEL: vperm2z_0x28:
    291 ; ALL:       ## BB#0:
    292 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
    293 ; ALL-NEXT:    retq
    294   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    295   ret <4 x double> %s
    296 }
    297 
    298 define <4 x double> @vperm2z_0x38(<4 x double> %a) {
    299 ; ALL-LABEL: vperm2z_0x38:
    300 ; ALL:       ## BB#0:
    301 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
    302 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
    303 ; ALL-NEXT:    retq
    304   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
    305   ret <4 x double> %s
    306 }
    307 
    308 define <4 x double> @vperm2z_0x80(<4 x double> %a) {
    309 ; ALL-LABEL: vperm2z_0x80:
    310 ; ALL:       ## BB#0:
    311 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
    312 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    313 ; ALL-NEXT:    retq
    314   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    315   ret <4 x double> %s
    316 }
    317 
    318 define <4 x double> @vperm2z_0x81(<4 x double> %a) {
    319 ; ALL-LABEL: vperm2z_0x81:
    320 ; ALL:       ## BB#0:
    321 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
    322 ; ALL-NEXT:    retq
    323   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
    324   ret <4 x double> %s
    325 }
    326 
    327 define <4 x double> @vperm2z_0x82(<4 x double> %a) {
    328 ; ALL-LABEL: vperm2z_0x82:
    329 ; ALL:       ## BB#0:
    330 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
    331 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    332 ; ALL-NEXT:    retq
    333   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
    334   ret <4 x double> %s
    335 }
    336 
    337 define <4 x double> @vperm2z_0x83(<4 x double> %a) {
    338 ; ALL-LABEL: vperm2z_0x83:
    339 ; ALL:       ## BB#0:
    340 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
    341 ; ALL-NEXT:    retq
    342   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
    343   ret <4 x double> %s
    344 }
    345 
    346 ;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection.
    347 
    348 define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
    349 ; AVX1-LABEL: vperm2z_int_0x83:
    350 ; AVX1:       ## BB#0:
    351 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
    352 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    353 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    354 ; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
    355 ; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
    356 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    357 ; AVX1-NEXT:    retq
    358 ;
    359 ; AVX2-LABEL: vperm2z_int_0x83:
    360 ; AVX2:       ## BB#0:
    361 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
    362 ; AVX2-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    363 ; AVX2-NEXT:    retq
    364   %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
    365   %c = add <4 x i64> %b, %s
    366   ret <4 x i64> %c
    367 }
    368 
    369