Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl| FileCheck %s
      2 
      3 ; 256-bit
      4 
      5 ; CHECK-LABEL: vpaddq256_test
      6 ; CHECK: vpaddq %ymm{{.*}}
      7 ; CHECK: ret
      8 define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
      9   %x = add <4 x i64> %i, %j
     10   ret <4 x i64> %x
     11 }
     12 
     13 ; CHECK-LABEL: vpaddq256_fold_test
     14 ; CHECK: vpaddq (%rdi), %ymm{{.*}}
     15 ; CHECK: ret
     16 define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, <4 x i64>* %j) nounwind {
     17   %tmp = load <4 x i64>, <4 x i64>* %j, align 4
     18   %x = add <4 x i64> %i, %tmp
     19   ret <4 x i64> %x
     20 }
     21 
     22 ; CHECK-LABEL: vpaddq256_broadcast_test
     23 ; CHECK: vpaddq LCP{{.*}}(%rip){1to4}, %ymm{{.*}}
     24 ; CHECK: ret
     25 define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind {
     26   %x = add <4 x i64> %i, <i64 1, i64 1, i64 1, i64 1>
     27   ret <4 x i64> %x
     28 }
     29 
     30 ; CHECK-LABEL: vpaddq256_broadcast2_test
     31 ; CHECK: vpaddq (%rdi){1to4}, %ymm{{.*}}
     32 ; CHECK: ret
     33 define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, i64* %j.ptr) nounwind {
     34   %j = load i64, i64* %j.ptr
     35   %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0
     36   %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer
     37   %x = add <4 x i64> %i, %j.v
     38   ret <4 x i64> %x
     39 }
     40 
     41 ; CHECK-LABEL: vpaddd256_test
     42 ; CHECK: vpaddd %ymm{{.*}}
     43 ; CHECK: ret
     44 define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
     45   %x = add <8 x i32> %i, %j
     46   ret <8 x i32> %x
     47 }
     48 
     49 ; CHECK-LABEL: vpaddd256_fold_test
     50 ; CHECK: vpaddd (%rdi), %ymm{{.*}}
     51 ; CHECK: ret
     52 define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, <8 x i32>* %j) nounwind {
     53   %tmp = load <8 x i32>, <8 x i32>* %j, align 4
     54   %x = add <8 x i32> %i, %tmp
     55   ret <8 x i32> %x
     56 }
     57 
     58 ; CHECK-LABEL: vpaddd256_broadcast_test
     59 ; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*}}
     60 ; CHECK: ret
     61 define <8 x i32> @vpaddd256_broadcast_test(<8 x i32> %i) nounwind {
     62   %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
     63   ret <8 x i32> %x
     64 }
     65 
     66 ; CHECK-LABEL: vpaddd256_mask_test
     67 ; CHECK: vpaddd %ymm{{.*%k[1-7].*}}
     68 ; CHECK: ret
     69 define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
     70   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
     71   %x = add <8 x i32> %i, %j
     72   %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
     73   ret <8 x i32> %r
     74 }
     75 
     76 ; CHECK-LABEL: vpaddd256_maskz_test
     77 ; CHECK: vpaddd %ymm{{.*{%k[1-7]} {z}.*}}
     78 ; CHECK: ret
     79 define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
     80   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
     81   %x = add <8 x i32> %i, %j
     82   %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
     83   ret <8 x i32> %r
     84 }
     85 
     86 ; CHECK-LABEL: vpaddd256_mask_fold_test
     87 ; CHECK: vpaddd (%rdi), %ymm{{.*%k[1-7]}}
     88 ; CHECK: ret
     89 define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
     90   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
     91   %j = load <8 x i32>, <8 x i32>* %j.ptr
     92   %x = add <8 x i32> %i, %j
     93   %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
     94   ret <8 x i32> %r
     95 }
     96 
     97 ; CHECK-LABEL: vpaddd256_mask_broadcast_test
     98 ; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]}}}
     99 ; CHECK: ret
    100 define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
    101   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    102   %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    103   %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
    104   ret <8 x i32> %r
    105 }
    106 
    107 ; CHECK-LABEL: vpaddd256_maskz_fold_test
    108 ; CHECK: vpaddd (%rdi), %ymm{{.*{%k[1-7]} {z}}}
    109 ; CHECK: ret
    110 define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
    111   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    112   %j = load <8 x i32>, <8 x i32>* %j.ptr
    113   %x = add <8 x i32> %i, %j
    114   %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    115   ret <8 x i32> %r
    116 }
    117 
    118 ; CHECK-LABEL: vpaddd256_maskz_broadcast_test
    119 ; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]} {z}}}
    120 ; CHECK: ret
    121 define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
    122   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    123   %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    124   %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    125   ret <8 x i32> %r
    126 }
    127 
    128 ; CHECK-LABEL: vpsubq256_test
    129 ; CHECK: vpsubq %ymm{{.*}}
    130 ; CHECK: ret
    131 define <4 x i64> @vpsubq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
    132   %x = sub <4 x i64> %i, %j
    133   ret <4 x i64> %x
    134 }
    135 
    136 ; CHECK-LABEL: vpsubd256_test
    137 ; CHECK: vpsubd %ymm{{.*}}
    138 ; CHECK: ret
    139 define <8 x i32> @vpsubd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
    140   %x = sub <8 x i32> %i, %j
    141   ret <8 x i32> %x
    142 }
    143 
    144 ; CHECK-LABEL: vpmulld256_test
    145 ; CHECK: vpmulld %ymm{{.*}}
    146 ; CHECK: ret
    147 define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) {
    148   %x = mul <8 x i32> %i, %j
    149   ret <8 x i32> %x
    150 }
    151 
    152 ; CHECK-LABEL: test_vaddpd_256
    153 ; CHECK: vaddpd{{.*}}
    154 ; CHECK: ret
    155 define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) {
    156 entry:
    157   %add.i = fadd <4 x double> %x, %y
    158   ret <4 x double> %add.i
    159 }
    160 
    161 ; CHECK-LABEL: test_fold_vaddpd_256
    162 ; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
    163 ; CHECK: ret
    164 define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) {
    165 entry:
    166   %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00>
    167   ret <4 x double> %add.i
    168 }
    169 
    170 ; CHECK-LABEL: test_broadcast_vaddpd_256
    171 ; CHECK: LCP{{.*}}(%rip){1to8}, %ymm0, %ymm0
    172 ; CHECK: ret
    173 define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind {
    174   %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
    175   ret <8 x float> %b
    176 }
    177 
    178 ; CHECK-LABEL: test_mask_vaddps_256
    179 ; CHECK: vaddps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    180 ; CHECK: ret
    181 define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i,
    182                                         <8 x float> %j, <8 x i32> %mask1)
    183                                         nounwind readnone {
    184   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    185   %x = fadd <8 x float> %i, %j
    186   %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
    187   ret <8 x float> %r
    188 }
    189 
    190 ; CHECK-LABEL: test_mask_vmulps_256
    191 ; CHECK: vmulps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    192 ; CHECK: ret
    193 define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i,
    194                                         <8 x float> %j, <8 x i32> %mask1)
    195                                         nounwind readnone {
    196   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    197   %x = fmul <8 x float> %i, %j
    198   %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
    199   ret <8 x float> %r
    200 }
    201 
    202 ; CHECK-LABEL: test_mask_vminps_256
    203 ; CHECK: vminps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    204 ; CHECK: ret
    205 define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i,
    206                                         <8 x float> %j, <8 x i32> %mask1)
    207                                         nounwind readnone {
    208   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    209   %cmp_res = fcmp olt <8 x float> %i, %j
    210   %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
    211   %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst
    212   ret <8 x float> %r
    213 }
    214 
    215 ; CHECK-LABEL: test_mask_vmaxps_256
    216 ; CHECK: vmaxps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    217 ; CHECK: ret
    218 define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i,
    219                                         <8 x float> %j, <8 x i32> %mask1)
    220                                         nounwind readnone {
    221   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    222   %cmp_res = fcmp ogt <8 x float> %i, %j
    223   %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
    224   %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst
    225   ret <8 x float> %r
    226 }
    227 
    228 ; CHECK-LABEL: test_mask_vsubps_256
    229 ; CHECK: vsubps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    230 ; CHECK: ret
    231 define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i,
    232                                         <8 x float> %j, <8 x i32> %mask1)
    233                                         nounwind readnone {
    234   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    235   %x = fsub <8 x float> %i, %j
    236   %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
    237   ret <8 x float> %r
    238 }
    239 
    240 ; CHECK-LABEL: test_mask_vdivps_256
    241 ; CHECK: vdivps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    242 ; CHECK: ret
    243 define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i,
    244                                         <8 x float> %j, <8 x i32> %mask1)
    245                                         nounwind readnone {
    246   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    247   %x = fdiv <8 x float> %i, %j
    248   %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
    249   ret <8 x float> %r
    250 }
    251 
    252 ; CHECK-LABEL: test_mask_vmulpd_256
    253 ; CHECK: vmulpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    254 ; CHECK: ret
    255 define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i,
    256                                         <4 x double> %j, <4 x i64> %mask1)
    257                                         nounwind readnone {
    258   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    259   %x = fmul <4 x double> %i, %j
    260   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
    261   ret <4 x double> %r
    262 }
    263 
    264 ; CHECK-LABEL: test_mask_vminpd_256
    265 ; CHECK: vminpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    266 ; CHECK: ret
    267 define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i,
    268                                         <4 x double> %j, <4 x i64> %mask1)
    269                                         nounwind readnone {
    270   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    271   %cmp_res = fcmp olt <4 x double> %i, %j
    272   %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
    273   %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst
    274   ret <4 x double> %r
    275 }
    276 
    277 ; CHECK-LABEL: test_mask_vmaxpd_256
    278 ; CHECK: vmaxpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    279 ; CHECK: ret
    280 define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i,
    281                                         <4 x double> %j, <4 x i64> %mask1)
    282                                         nounwind readnone {
    283   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    284   %cmp_res = fcmp ogt <4 x double> %i, %j
    285   %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
    286   %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst
    287   ret <4 x double> %r
    288 }
    289 
    290 ; CHECK-LABEL: test_mask_vsubpd_256
    291 ; CHECK: vsubpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    292 ; CHECK: ret
    293 define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i,
    294                                         <4 x double> %j, <4 x i64> %mask1)
    295                                         nounwind readnone {
    296   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    297   %x = fsub <4 x double> %i, %j
    298   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
    299   ret <4 x double> %r
    300 }
    301 
    302 ; CHECK-LABEL: test_mask_vdivpd_256
    303 ; CHECK: vdivpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    304 ; CHECK: ret
    305 define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i,
    306                                         <4 x double> %j, <4 x i64> %mask1)
    307                                         nounwind readnone {
    308   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    309   %x = fdiv <4 x double> %i, %j
    310   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
    311   ret <4 x double> %r
    312 }
    313 
    314 ; CHECK-LABEL: test_mask_vaddpd_256
    315 ; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
    316 ; CHECK: ret
    317 define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i,
    318                                          <4 x double> %j, <4 x i64> %mask1)
    319                                          nounwind readnone {
    320   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    321   %x = fadd <4 x double> %i, %j
    322   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
    323   ret <4 x double> %r
    324 }
    325 
    326 ; CHECK-LABEL: test_maskz_vaddpd_256
    327 ; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}}}
    328 ; CHECK: ret
    329 define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j,
    330                                           <4 x i64> %mask1) nounwind readnone {
    331   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    332   %x = fadd <4 x double> %i, %j
    333   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
    334   ret <4 x double> %r
    335 }
    336 
    337 ; CHECK-LABEL: test_mask_fold_vaddpd_256
    338 ; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}.*}}
    339 ; CHECK: ret
    340 define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i,
    341                                          <4 x double>* %j,  <4 x i64> %mask1)
    342                                          nounwind {
    343   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    344   %tmp = load <4 x double>, <4 x double>* %j
    345   %x = fadd <4 x double> %i, %tmp
    346   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
    347   ret <4 x double> %r
    348 }
    349 
    350 ; CHECK-LABEL: test_maskz_fold_vaddpd_256
    351 ; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}.*}}
    352 ; CHECK: ret
    353 define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j,
    354                                           <4 x i64> %mask1) nounwind {
    355   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    356   %tmp = load <4 x double>, <4 x double>* %j
    357   %x = fadd <4 x double> %i, %tmp
    358   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
    359   ret <4 x double> %r
    360 }
    361 
    362 ; CHECK-LABEL: test_broadcast2_vaddpd_256
    363 ; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}}
    364 ; CHECK: ret
    365 define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind {
    366   %tmp = load double, double* %j
    367   %b = insertelement <4 x double> undef, double %tmp, i32 0
    368   %c = shufflevector <4 x double> %b, <4 x double> undef,
    369                      <4 x i32> zeroinitializer
    370   %x = fadd <4 x double> %c, %i
    371   ret <4 x double> %x
    372 }
    373 
    374 ; CHECK-LABEL: test_mask_broadcast_vaddpd_256
    375 ; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]}.*}}
    376 ; CHECK: ret
    377 define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i,
    378                                           double* %j, <4 x i64> %mask1) nounwind {
    379   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    380   %tmp = load double, double* %j
    381   %b = insertelement <4 x double> undef, double %tmp, i32 0
    382   %c = shufflevector <4 x double> %b, <4 x double> undef,
    383                      <4 x i32> zeroinitializer
    384   %x = fadd <4 x double> %c, %i
    385   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i
    386   ret <4 x double> %r
    387 }
    388 
    389 ; CHECK-LABEL: test_maskz_broadcast_vaddpd_256
    390 ; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]} {z}.*}}
    391 ; CHECK: ret
    392 define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j,
    393                                            <4 x i64> %mask1) nounwind {
    394   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    395   %tmp = load double, double* %j
    396   %b = insertelement <4 x double> undef, double %tmp, i32 0
    397   %c = shufflevector <4 x double> %b, <4 x double> undef,
    398                      <4 x i32> zeroinitializer
    399   %x = fadd <4 x double> %c, %i
    400   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
    401   ret <4 x double> %r
    402 }
    403 
    404 ; 128-bit
    405 
    406 ; CHECK-LABEL: vpaddq128_test
    407 ; CHECK: vpaddq %xmm{{.*}}
    408 ; CHECK: ret
    409 define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
    410   %x = add <2 x i64> %i, %j
    411   ret <2 x i64> %x
    412 }
    413 
    414 ; CHECK-LABEL: vpaddq128_fold_test
    415 ; CHECK: vpaddq (%rdi), %xmm{{.*}}
    416 ; CHECK: ret
    417 define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind {
    418   %tmp = load <2 x i64>, <2 x i64>* %j, align 4
    419   %x = add <2 x i64> %i, %tmp
    420   ret <2 x i64> %x
    421 }
    422 
    423 ; CHECK-LABEL: vpaddq128_broadcast2_test
    424 ; CHECK: vpaddq (%rdi){1to2}, %xmm{{.*}}
    425 ; CHECK: ret
    426 define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, i64* %j) nounwind {
    427   %tmp = load i64, i64* %j
    428   %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0
    429   %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1
    430   %x = add <2 x i64> %i, %j.1
    431   ret <2 x i64> %x
    432 }
    433 
    434 ; CHECK-LABEL: vpaddd128_test
    435 ; CHECK: vpaddd %xmm{{.*}}
    436 ; CHECK: ret
    437 define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
    438   %x = add <4 x i32> %i, %j
    439   ret <4 x i32> %x
    440 }
    441 
    442 ; CHECK-LABEL: vpaddd128_fold_test
    443 ; CHECK: vpaddd (%rdi), %xmm{{.*}}
    444 ; CHECK: ret
    445 define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, <4 x i32>* %j) nounwind {
    446   %tmp = load <4 x i32>, <4 x i32>* %j, align 4
    447   %x = add <4 x i32> %i, %tmp
    448   ret <4 x i32> %x
    449 }
    450 
    451 ; CHECK-LABEL: vpaddd128_broadcast_test
    452 ; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*}}
    453 ; CHECK: ret
    454 define <4 x i32> @vpaddd128_broadcast_test(<4 x i32> %i) nounwind {
    455   %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
    456   ret <4 x i32> %x
    457 }
    458 
    459 ; CHECK-LABEL: vpaddd128_mask_test
    460 ; CHECK: vpaddd %xmm{{.*%k[1-7].*}}
    461 ; CHECK: ret
    462 define <4 x i32> @vpaddd128_mask_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
    463   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    464   %x = add <4 x i32> %i, %j
    465   %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
    466   ret <4 x i32> %r
    467 }
    468 
    469 ; CHECK-LABEL: vpaddd128_maskz_test
    470 ; CHECK: vpaddd %xmm{{.*{%k[1-7]} {z}.*}}
    471 ; CHECK: ret
    472 define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
    473   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    474   %x = add <4 x i32> %i, %j
    475   %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    476   ret <4 x i32> %r
    477 }
    478 
    479 ; CHECK-LABEL: vpaddd128_mask_fold_test
    480 ; CHECK: vpaddd (%rdi), %xmm{{.*%k[1-7]}}
    481 ; CHECK: ret
    482 define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
    483   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    484   %j = load <4 x i32>, <4 x i32>* %j.ptr
    485   %x = add <4 x i32> %i, %j
    486   %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
    487   ret <4 x i32> %r
    488 }
    489 
    490 ; CHECK-LABEL: vpaddd128_mask_broadcast_test
    491 ; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]}}}
    492 ; CHECK: ret
    493 define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
    494   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    495   %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
    496   %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
    497   ret <4 x i32> %r
    498 }
    499 
    500 ; CHECK-LABEL: vpaddd128_maskz_fold_test
    501 ; CHECK: vpaddd (%rdi), %xmm{{.*{%k[1-7]} {z}}}
    502 ; CHECK: ret
    503 define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
    504   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    505   %j = load <4 x i32>, <4 x i32>* %j.ptr
    506   %x = add <4 x i32> %i, %j
    507   %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    508   ret <4 x i32> %r
    509 }
    510 
    511 ; CHECK-LABEL: vpaddd128_maskz_broadcast_test
    512 ; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]} {z}}}
    513 ; CHECK: ret
    514 define <4 x i32> @vpaddd128_maskz_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
    515   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    516   %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
    517   %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    518   ret <4 x i32> %r
    519 }
    520 
    521 ; CHECK-LABEL: vpsubq128_test
    522 ; CHECK: vpsubq %xmm{{.*}}
    523 ; CHECK: ret
    524 define <2 x i64> @vpsubq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
    525   %x = sub <2 x i64> %i, %j
    526   ret <2 x i64> %x
    527 }
    528 
    529 ; CHECK-LABEL: vpsubd128_test
    530 ; CHECK: vpsubd %xmm{{.*}}
    531 ; CHECK: ret
    532 define <4 x i32> @vpsubd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
    533   %x = sub <4 x i32> %i, %j
    534   ret <4 x i32> %x
    535 }
    536 
    537 ; CHECK-LABEL: vpmulld128_test
    538 ; CHECK: vpmulld %xmm{{.*}}
    539 ; CHECK: ret
    540 define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) {
    541   %x = mul <4 x i32> %i, %j
    542   ret <4 x i32> %x
    543 }
    544 
    545 ; CHECK-LABEL: test_vaddpd_128
    546 ; CHECK: vaddpd{{.*}}
    547 ; CHECK: ret
    548 define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) {
    549 entry:
    550   %add.i = fadd <2 x double> %x, %y
    551   ret <2 x double> %add.i
    552 }
    553 
    554 ; CHECK-LABEL: test_fold_vaddpd_128
    555 ; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
    556 ; CHECK: ret
    557 define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) {
    558 entry:
    559   %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00>
    560   ret <2 x double> %add.i
    561 }
    562 
    563 ; CHECK-LABEL: test_broadcast_vaddpd_128
    564 ; CHECK: LCP{{.*}}(%rip){1to4}, %xmm0, %xmm0
    565 ; CHECK: ret
    566 define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind {
    567   %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
    568   ret <4 x float> %b
    569 }
    570 
    571 ; CHECK-LABEL: test_mask_vaddps_128
    572 ; CHECK: vaddps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    573 ; CHECK: ret
    574 define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i,
    575                                         <4 x float> %j, <4 x i32> %mask1)
    576                                         nounwind readnone {
    577   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    578   %x = fadd <4 x float> %i, %j
    579   %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
    580   ret <4 x float> %r
    581 }
    582 
    583 ; CHECK-LABEL: test_mask_vmulps_128
    584 ; CHECK: vmulps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    585 ; CHECK: ret
    586 define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i,
    587                                         <4 x float> %j, <4 x i32> %mask1)
    588                                         nounwind readnone {
    589   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    590   %x = fmul <4 x float> %i, %j
    591   %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
    592   ret <4 x float> %r
    593 }
    594 
    595 ; CHECK-LABEL: test_mask_vminps_128
    596 ; CHECK: vminps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    597 ; CHECK: ret
    598 define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i,
    599                                         <4 x float> %j, <4 x i32> %mask1)
    600                                         nounwind readnone {
    601   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    602   %cmp_res = fcmp olt <4 x float> %i, %j
    603   %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
    604   %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst
    605   ret <4 x float> %r
    606 }
    607 
    608 ; CHECK-LABEL: test_mask_vmaxps_128
    609 ; CHECK: vmaxps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    610 ; CHECK: ret
    611 define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i,
    612                                         <4 x float> %j, <4 x i32> %mask1)
    613                                         nounwind readnone {
    614   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    615   %cmp_res = fcmp ogt <4 x float> %i, %j
    616   %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
    617   %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst
    618   ret <4 x float> %r
    619 }
    620 
    621 ; CHECK-LABEL: test_mask_vsubps_128
    622 ; CHECK: vsubps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    623 ; CHECK: ret
    624 define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i,
    625                                         <4 x float> %j, <4 x i32> %mask1)
    626                                         nounwind readnone {
    627   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    628   %x = fsub <4 x float> %i, %j
    629   %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
    630   ret <4 x float> %r
    631 }
    632 
    633 
    634 ; CHECK-LABEL: test_mask_vdivps_128
    635 ; CHECK: vdivps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    636 ; CHECK: ret
    637 define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i,
    638                                         <4 x float> %j, <4 x i32> %mask1)
    639                                         nounwind readnone {
    640   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    641   %x = fdiv <4 x float> %i, %j
    642   %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
    643   ret <4 x float> %r
    644 }
    645 
    646 ; CHECK-LABEL: test_mask_vmulpd_128
    647 ; CHECK: vmulpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    648 ; CHECK: ret
    649 define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i,
    650                                         <2 x double> %j, <2 x i64> %mask1)
    651                                         nounwind readnone {
    652   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    653   %x = fmul <2 x double> %i, %j
    654   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
    655   ret <2 x double> %r
    656 }
    657 
    658 ; CHECK-LABEL: test_mask_vminpd_128
    659 ; CHECK: vminpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    660 ; CHECK: ret
    661 define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i,
    662                                         <2 x double> %j, <2 x i64> %mask1)
    663                                         nounwind readnone {
    664   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    665   %cmp_res = fcmp olt <2 x double> %i, %j
    666   %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
    667   %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst
    668   ret <2 x double> %r
    669 }
    670 
    671 ; CHECK-LABEL: test_mask_vmaxpd_128
    672 ; CHECK: vmaxpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    673 ; CHECK: ret
    674 define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i,
    675                                         <2 x double> %j, <2 x i64> %mask1)
    676                                         nounwind readnone {
    677   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    678   %cmp_res = fcmp ogt <2 x double> %i, %j
    679   %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
    680   %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst
    681   ret <2 x double> %r
    682 }
    683 
    684 ; CHECK-LABEL: test_mask_vsubpd_128
    685 ; CHECK: vsubpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    686 ; CHECK: ret
    687 define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i,
    688                                         <2 x double> %j, <2 x i64> %mask1)
    689                                         nounwind readnone {
    690   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    691   %x = fsub <2 x double> %i, %j
    692   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
    693   ret <2 x double> %r
    694 }
    695 
    696 ; CHECK-LABEL: test_mask_vdivpd_128
    697 ; CHECK: vdivpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    698 ; CHECK: ret
    699 define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i,
    700                                         <2 x double> %j, <2 x i64> %mask1)
    701                                         nounwind readnone {
    702   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    703   %x = fdiv <2 x double> %i, %j
    704   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
    705   ret <2 x double> %r
    706 }
    707 
    708 ; CHECK-LABEL: test_mask_vaddpd_128
    709 ; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
    710 ; CHECK: ret
    711 define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i,
    712                                          <2 x double> %j, <2 x i64> %mask1)
    713                                          nounwind readnone {
    714   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    715   %x = fadd <2 x double> %i, %j
    716   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
    717   ret <2 x double> %r
    718 }
    719 
    720 ; CHECK-LABEL: test_maskz_vaddpd_128
    721 ; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}}}
    722 ; CHECK: ret
    723 define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j,
    724                                           <2 x i64> %mask1) nounwind readnone {
    725   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    726   %x = fadd <2 x double> %i, %j
    727   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
    728   ret <2 x double> %r
    729 }
    730 
    731 ; CHECK-LABEL: test_mask_fold_vaddpd_128
    732 ; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}.*}}
    733 ; CHECK: ret
    734 define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i,
    735                                          <2 x double>* %j,  <2 x i64> %mask1)
    736                                          nounwind {
    737   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    738   %tmp = load <2 x double>, <2 x double>* %j
    739   %x = fadd <2 x double> %i, %tmp
    740   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
    741   ret <2 x double> %r
    742 }
    743 
    744 ; CHECK-LABEL: test_maskz_fold_vaddpd_128
    745 ; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}.*}}
    746 ; CHECK: ret
    747 define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j,
    748                                           <2 x i64> %mask1) nounwind {
    749   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    750   %tmp = load <2 x double>, <2 x double>* %j
    751   %x = fadd <2 x double> %i, %tmp
    752   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
    753   ret <2 x double> %r
    754 }
    755 
    756 ; CHECK-LABEL: test_broadcast2_vaddpd_128
    757 ; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}}
    758 ; CHECK: ret
    759 define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind {
    760   %tmp = load double, double* %j
    761   %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
    762   %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
    763   %x = fadd <2 x double> %j.1, %i
    764   ret <2 x double> %x
    765 }
    766 
    767 ; CHECK-LABEL: test_mask_broadcast_vaddpd_128
    768 ; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]}.*}}
    769 ; CHECK: ret
    770 define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i,
    771                                           double* %j, <2 x i64> %mask1)
    772                                           nounwind {
    773   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    774   %tmp = load double, double* %j
    775   %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
    776   %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
    777   %x = fadd <2 x double> %j.1, %i
    778   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i
    779   ret <2 x double> %r
    780 }
    781 
    782 ; CHECK-LABEL: test_maskz_broadcast_vaddpd_128
    783 ; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]} {z}.*}}
    784 ; CHECK: ret
    785 define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j,
    786                                            <2 x i64> %mask1) nounwind {
    787   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    788   %tmp = load double, double* %j
    789   %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
    790   %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
    791   %x = fadd <2 x double> %j.1, %i
    792   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
    793   ret <2 x double> %r
    794 }
    795