Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
      4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
      5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s
      6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s
      7 
      8 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
      9 ; CHECK-LABEL: addpd512:
     10 ; CHECK:       ## BB#0: ## %entry
     11 ; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
     12 ; CHECK-NEXT:    retq
     13 entry:
     14   %add.i = fadd <8 x double> %x, %y
     15   ret <8 x double> %add.i
     16 }
     17 
     18 define <8 x double> @addpd512fold(<8 x double> %y) {
     19 ; CHECK-LABEL: addpd512fold:
     20 ; CHECK:       ## BB#0: ## %entry
     21 ; CHECK-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
     22 ; CHECK-NEXT:    retq
     23 entry:
     24   %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
     25   ret <8 x double> %add.i
     26 }
     27 
     28 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
     29 ; CHECK-LABEL: addps512:
     30 ; CHECK:       ## BB#0: ## %entry
     31 ; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
     32 ; CHECK-NEXT:    retq
     33 entry:
     34   %add.i = fadd <16 x float> %x, %y
     35   ret <16 x float> %add.i
     36 }
     37 
     38 define <16 x float> @addps512fold(<16 x float> %y) {
     39 ; CHECK-LABEL: addps512fold:
     40 ; CHECK:       ## BB#0: ## %entry
     41 ; CHECK-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
     42 ; CHECK-NEXT:    retq
     43 entry:
     44   %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
     45   ret <16 x float> %add.i
     46 }
     47 
     48 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
     49 ; CHECK-LABEL: subpd512:
     50 ; CHECK:       ## BB#0: ## %entry
     51 ; CHECK-NEXT:    vsubpd %zmm0, %zmm1, %zmm0
     52 ; CHECK-NEXT:    retq
     53 entry:
     54   %sub.i = fsub <8 x double> %x, %y
     55   ret <8 x double> %sub.i
     56 }
     57 
     58 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
     59 ; CHECK-LABEL: subpd512fold:
     60 ; CHECK:       ## BB#0: ## %entry
     61 ; CHECK-NEXT:    vsubpd (%rdi), %zmm0, %zmm0
     62 ; CHECK-NEXT:    retq
     63 entry:
     64   %tmp2 = load <8 x double>, <8 x double>* %x, align 8
     65   %sub.i = fsub <8 x double> %y, %tmp2
     66   ret <8 x double> %sub.i
     67 }
     68 
     69 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
     70 ; CHECK-LABEL: subps512:
     71 ; CHECK:       ## BB#0: ## %entry
     72 ; CHECK-NEXT:    vsubps %zmm0, %zmm1, %zmm0
     73 ; CHECK-NEXT:    retq
     74 entry:
     75   %sub.i = fsub <16 x float> %x, %y
     76   ret <16 x float> %sub.i
     77 }
     78 
     79 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
     80 ; CHECK-LABEL: subps512fold:
     81 ; CHECK:       ## BB#0: ## %entry
     82 ; CHECK-NEXT:    vsubps (%rdi), %zmm0, %zmm0
     83 ; CHECK-NEXT:    retq
     84 entry:
     85   %tmp2 = load <16 x float>, <16 x float>* %x, align 4
     86   %sub.i = fsub <16 x float> %y, %tmp2
     87   ret <16 x float> %sub.i
     88 }
     89 
     90 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
     91 ; AVX512F-LABEL: imulq512:
     92 ; AVX512F:       ## BB#0:
     93 ; AVX512F-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
     94 ; AVX512F-NEXT:    vpsrlq $32, %zmm0, %zmm3
     95 ; AVX512F-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
     96 ; AVX512F-NEXT:    vpsllq $32, %zmm3, %zmm3
     97 ; AVX512F-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
     98 ; AVX512F-NEXT:    vpsrlq $32, %zmm1, %zmm1
     99 ; AVX512F-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
    100 ; AVX512F-NEXT:    vpsllq $32, %zmm0, %zmm0
    101 ; AVX512F-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
    102 ; AVX512F-NEXT:    retq
    103 ;
    104 ; AVX512VL-LABEL: imulq512:
    105 ; AVX512VL:       ## BB#0:
    106 ; AVX512VL-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
    107 ; AVX512VL-NEXT:    vpsrlq $32, %zmm0, %zmm3
    108 ; AVX512VL-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
    109 ; AVX512VL-NEXT:    vpsllq $32, %zmm3, %zmm3
    110 ; AVX512VL-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
    111 ; AVX512VL-NEXT:    vpsrlq $32, %zmm1, %zmm1
    112 ; AVX512VL-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
    113 ; AVX512VL-NEXT:    vpsllq $32, %zmm0, %zmm0
    114 ; AVX512VL-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
    115 ; AVX512VL-NEXT:    retq
    116 ;
    117 ; AVX512BW-LABEL: imulq512:
    118 ; AVX512BW:       ## BB#0:
    119 ; AVX512BW-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
    120 ; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm3
    121 ; AVX512BW-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
    122 ; AVX512BW-NEXT:    vpsllq $32, %zmm3, %zmm3
    123 ; AVX512BW-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
    124 ; AVX512BW-NEXT:    vpsrlq $32, %zmm1, %zmm1
    125 ; AVX512BW-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
    126 ; AVX512BW-NEXT:    vpsllq $32, %zmm0, %zmm0
    127 ; AVX512BW-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
    128 ; AVX512BW-NEXT:    retq
    129 ;
    130 ; AVX512DQ-LABEL: imulq512:
    131 ; AVX512DQ:       ## BB#0:
    132 ; AVX512DQ-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
    133 ; AVX512DQ-NEXT:    retq
    134 ;
    135 ; SKX-LABEL: imulq512:
    136 ; SKX:       ## BB#0:
    137 ; SKX-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
    138 ; SKX-NEXT:    retq
    139   %z = mul <8 x i64>%x, %y
    140   ret <8 x i64>%z
    141 }
    142 
    143 define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
    144 ; AVX512F-LABEL: imulq256:
    145 ; AVX512F:       ## BB#0:
    146 ; AVX512F-NEXT:    vpmuludq %ymm0, %ymm1, %ymm2
    147 ; AVX512F-NEXT:    vpsrlq $32, %ymm0, %ymm3
    148 ; AVX512F-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
    149 ; AVX512F-NEXT:    vpsllq $32, %ymm3, %ymm3
    150 ; AVX512F-NEXT:    vpaddq %ymm3, %ymm2, %ymm2
    151 ; AVX512F-NEXT:    vpsrlq $32, %ymm1, %ymm1
    152 ; AVX512F-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
    153 ; AVX512F-NEXT:    vpsllq $32, %ymm0, %ymm0
    154 ; AVX512F-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
    155 ; AVX512F-NEXT:    retq
    156 ;
    157 ; AVX512VL-LABEL: imulq256:
    158 ; AVX512VL:       ## BB#0:
    159 ; AVX512VL-NEXT:    vpmuludq %ymm0, %ymm1, %ymm2
    160 ; AVX512VL-NEXT:    vpsrlq $32, %ymm0, %ymm3
    161 ; AVX512VL-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
    162 ; AVX512VL-NEXT:    vpsllq $32, %ymm3, %ymm3
    163 ; AVX512VL-NEXT:    vpaddq %ymm3, %ymm2, %ymm2
    164 ; AVX512VL-NEXT:    vpsrlq $32, %ymm1, %ymm1
    165 ; AVX512VL-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
    166 ; AVX512VL-NEXT:    vpsllq $32, %ymm0, %ymm0
    167 ; AVX512VL-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
    168 ; AVX512VL-NEXT:    retq
    169 ;
    170 ; AVX512BW-LABEL: imulq256:
    171 ; AVX512BW:       ## BB#0:
    172 ; AVX512BW-NEXT:    vpmuludq %ymm0, %ymm1, %ymm2
    173 ; AVX512BW-NEXT:    vpsrlq $32, %ymm0, %ymm3
    174 ; AVX512BW-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
    175 ; AVX512BW-NEXT:    vpsllq $32, %ymm3, %ymm3
    176 ; AVX512BW-NEXT:    vpaddq %ymm3, %ymm2, %ymm2
    177 ; AVX512BW-NEXT:    vpsrlq $32, %ymm1, %ymm1
    178 ; AVX512BW-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
    179 ; AVX512BW-NEXT:    vpsllq $32, %ymm0, %ymm0
    180 ; AVX512BW-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
    181 ; AVX512BW-NEXT:    retq
    182 ;
    183 ; AVX512DQ-LABEL: imulq256:
    184 ; AVX512DQ:       ## BB#0:
    185 ; AVX512DQ-NEXT:    vpmuludq %ymm0, %ymm1, %ymm2
    186 ; AVX512DQ-NEXT:    vpsrlq $32, %ymm0, %ymm3
    187 ; AVX512DQ-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
    188 ; AVX512DQ-NEXT:    vpsllq $32, %ymm3, %ymm3
    189 ; AVX512DQ-NEXT:    vpaddq %ymm3, %ymm2, %ymm2
    190 ; AVX512DQ-NEXT:    vpsrlq $32, %ymm1, %ymm1
    191 ; AVX512DQ-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
    192 ; AVX512DQ-NEXT:    vpsllq $32, %ymm0, %ymm0
    193 ; AVX512DQ-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
    194 ; AVX512DQ-NEXT:    retq
    195 ;
    196 ; SKX-LABEL: imulq256:
    197 ; SKX:       ## BB#0:
    198 ; SKX-NEXT:    vpmullq %ymm0, %ymm1, %ymm0
    199 ; SKX-NEXT:    retq
    200   %z = mul <4 x i64>%x, %y
    201   ret <4 x i64>%z
    202 }
    203 
    204 define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
    205 ; AVX512F-LABEL: imulq128:
    206 ; AVX512F:       ## BB#0:
    207 ; AVX512F-NEXT:    vpmuludq %xmm0, %xmm1, %xmm2
    208 ; AVX512F-NEXT:    vpsrlq $32, %xmm0, %xmm3
    209 ; AVX512F-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
    210 ; AVX512F-NEXT:    vpsllq $32, %xmm3, %xmm3
    211 ; AVX512F-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
    212 ; AVX512F-NEXT:    vpsrlq $32, %xmm1, %xmm1
    213 ; AVX512F-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
    214 ; AVX512F-NEXT:    vpsllq $32, %xmm0, %xmm0
    215 ; AVX512F-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    216 ; AVX512F-NEXT:    retq
    217 ;
    218 ; AVX512VL-LABEL: imulq128:
    219 ; AVX512VL:       ## BB#0:
    220 ; AVX512VL-NEXT:    vpmuludq %xmm0, %xmm1, %xmm2
    221 ; AVX512VL-NEXT:    vpsrlq $32, %xmm0, %xmm3
    222 ; AVX512VL-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
    223 ; AVX512VL-NEXT:    vpsllq $32, %xmm3, %xmm3
    224 ; AVX512VL-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
    225 ; AVX512VL-NEXT:    vpsrlq $32, %xmm1, %xmm1
    226 ; AVX512VL-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
    227 ; AVX512VL-NEXT:    vpsllq $32, %xmm0, %xmm0
    228 ; AVX512VL-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    229 ; AVX512VL-NEXT:    retq
    230 ;
    231 ; AVX512BW-LABEL: imulq128:
    232 ; AVX512BW:       ## BB#0:
    233 ; AVX512BW-NEXT:    vpmuludq %xmm0, %xmm1, %xmm2
    234 ; AVX512BW-NEXT:    vpsrlq $32, %xmm0, %xmm3
    235 ; AVX512BW-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
    236 ; AVX512BW-NEXT:    vpsllq $32, %xmm3, %xmm3
    237 ; AVX512BW-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
    238 ; AVX512BW-NEXT:    vpsrlq $32, %xmm1, %xmm1
    239 ; AVX512BW-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
    240 ; AVX512BW-NEXT:    vpsllq $32, %xmm0, %xmm0
    241 ; AVX512BW-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    242 ; AVX512BW-NEXT:    retq
    243 ;
    244 ; AVX512DQ-LABEL: imulq128:
    245 ; AVX512DQ:       ## BB#0:
    246 ; AVX512DQ-NEXT:    vpmuludq %xmm0, %xmm1, %xmm2
    247 ; AVX512DQ-NEXT:    vpsrlq $32, %xmm0, %xmm3
    248 ; AVX512DQ-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
    249 ; AVX512DQ-NEXT:    vpsllq $32, %xmm3, %xmm3
    250 ; AVX512DQ-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
    251 ; AVX512DQ-NEXT:    vpsrlq $32, %xmm1, %xmm1
    252 ; AVX512DQ-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
    253 ; AVX512DQ-NEXT:    vpsllq $32, %xmm0, %xmm0
    254 ; AVX512DQ-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    255 ; AVX512DQ-NEXT:    retq
    256 ;
    257 ; SKX-LABEL: imulq128:
    258 ; SKX:       ## BB#0:
    259 ; SKX-NEXT:    vpmullq %xmm0, %xmm1, %xmm0
    260 ; SKX-NEXT:    retq
    261   %z = mul <2 x i64>%x, %y
    262   ret <2 x i64>%z
    263 }
    264 
    265 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
    266 ; CHECK-LABEL: mulpd512:
    267 ; CHECK:       ## BB#0: ## %entry
    268 ; CHECK-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
    269 ; CHECK-NEXT:    retq
    270 entry:
    271   %mul.i = fmul <8 x double> %x, %y
    272   ret <8 x double> %mul.i
    273 }
    274 
    275 define <8 x double> @mulpd512fold(<8 x double> %y) {
    276 ; CHECK-LABEL: mulpd512fold:
    277 ; CHECK:       ## BB#0: ## %entry
    278 ; CHECK-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0
    279 ; CHECK-NEXT:    retq
    280 entry:
    281   %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
    282   ret <8 x double> %mul.i
    283 }
    284 
    285 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
    286 ; CHECK-LABEL: mulps512:
    287 ; CHECK:       ## BB#0: ## %entry
    288 ; CHECK-NEXT:    vmulps %zmm0, %zmm1, %zmm0
    289 ; CHECK-NEXT:    retq
    290 entry:
    291   %mul.i = fmul <16 x float> %x, %y
    292   ret <16 x float> %mul.i
    293 }
    294 
    295 define <16 x float> @mulps512fold(<16 x float> %y) {
    296 ; CHECK-LABEL: mulps512fold:
    297 ; CHECK:       ## BB#0: ## %entry
    298 ; CHECK-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0
    299 ; CHECK-NEXT:    retq
    300 entry:
    301   %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
    302   ret <16 x float> %mul.i
    303 }
    304 
    305 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
    306 ; CHECK-LABEL: divpd512:
    307 ; CHECK:       ## BB#0: ## %entry
    308 ; CHECK-NEXT:    vdivpd %zmm0, %zmm1, %zmm0
    309 ; CHECK-NEXT:    retq
    310 entry:
    311   %div.i = fdiv <8 x double> %x, %y
    312   ret <8 x double> %div.i
    313 }
    314 
    315 define <8 x double> @divpd512fold(<8 x double> %y) {
    316 ; CHECK-LABEL: divpd512fold:
    317 ; CHECK:       ## BB#0: ## %entry
    318 ; CHECK-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0
    319 ; CHECK-NEXT:    retq
    320 entry:
    321   %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
    322   ret <8 x double> %div.i
    323 }
    324 
    325 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
    326 ; CHECK-LABEL: divps512:
    327 ; CHECK:       ## BB#0: ## %entry
    328 ; CHECK-NEXT:    vdivps %zmm0, %zmm1, %zmm0
    329 ; CHECK-NEXT:    retq
    330 entry:
    331   %div.i = fdiv <16 x float> %x, %y
    332   ret <16 x float> %div.i
    333 }
    334 
    335 define <16 x float> @divps512fold(<16 x float> %y) {
    336 ; CHECK-LABEL: divps512fold:
    337 ; CHECK:       ## BB#0: ## %entry
    338 ; CHECK-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0
    339 ; CHECK-NEXT:    retq
    340 entry:
    341   %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
    342   ret <16 x float> %div.i
    343 }
    344 
    345 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
    346 ; CHECK-LABEL: vpaddq_test:
    347 ; CHECK:       ## BB#0:
    348 ; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
    349 ; CHECK-NEXT:    retq
    350   %x = add <8 x i64> %i, %j
    351   ret <8 x i64> %x
    352 }
    353 
    354 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
    355 ; CHECK-LABEL: vpaddq_fold_test:
    356 ; CHECK:       ## BB#0:
    357 ; CHECK-NEXT:    vpaddq (%rdi), %zmm0, %zmm0
    358 ; CHECK-NEXT:    retq
    359   %tmp = load <8 x i64>, <8 x i64>* %j, align 4
    360   %x = add <8 x i64> %i, %tmp
    361   ret <8 x i64> %x
    362 }
    363 
    364 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
    365 ; CHECK-LABEL: vpaddq_broadcast_test:
    366 ; CHECK:       ## BB#0:
    367 ; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
    368 ; CHECK-NEXT:    retq
    369   %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
    370   ret <8 x i64> %x
    371 }
    372 
    373 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
    374 ; CHECK-LABEL: vpaddq_broadcast2_test:
    375 ; CHECK:       ## BB#0:
    376 ; CHECK-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0
    377 ; CHECK-NEXT:    retq
    378   %tmp = load i64, i64* %j
    379   %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
    380   %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
    381   %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
    382   %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
    383   %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
    384   %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
    385   %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
    386   %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
    387   %x = add <8 x i64> %i, %j.7
    388   ret <8 x i64> %x
    389 }
    390 
    391 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
    392 ; CHECK-LABEL: vpaddd_test:
    393 ; CHECK:       ## BB#0:
    394 ; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
    395 ; CHECK-NEXT:    retq
    396   %x = add <16 x i32> %i, %j
    397   ret <16 x i32> %x
    398 }
    399 
    400 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
    401 ; CHECK-LABEL: vpaddd_fold_test:
    402 ; CHECK:       ## BB#0:
    403 ; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0
    404 ; CHECK-NEXT:    retq
    405   %tmp = load <16 x i32>, <16 x i32>* %j, align 4
    406   %x = add <16 x i32> %i, %tmp
    407   ret <16 x i32> %x
    408 }
    409 
    410 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
    411 ; CHECK-LABEL: vpaddd_broadcast_test:
    412 ; CHECK:       ## BB#0:
    413 ; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
    414 ; CHECK-NEXT:    retq
    415   %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    416   ret <16 x i32> %x
    417 }
    418 
    419 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
    420 ; CHECK-LABEL: vpaddd_mask_test:
    421 ; CHECK:       ## BB#0:
    422 ; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
    423 ; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
    424 ; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1}
    425 ; CHECK-NEXT:    retq
    426   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    427   %x = add <16 x i32> %i, %j
    428   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
    429   ret <16 x i32> %r
    430 }
    431 
    432 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
    433 ; CHECK-LABEL: vpaddd_maskz_test:
    434 ; CHECK:       ## BB#0:
    435 ; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
    436 ; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
    437 ; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
    438 ; CHECK-NEXT:    retq
    439   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    440   %x = add <16 x i32> %i, %j
    441   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    442   ret <16 x i32> %r
    443 }
    444 
    445 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
    446 ; CHECK-LABEL: vpaddd_mask_fold_test:
    447 ; CHECK:       ## BB#0:
    448 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    449 ; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
    450 ; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1}
    451 ; CHECK-NEXT:    retq
    452   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    453   %j = load <16 x i32>, <16 x i32>* %j.ptr
    454   %x = add <16 x i32> %i, %j
    455   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
    456   ret <16 x i32> %r
    457 }
    458 
    459 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
    460 ; CHECK-LABEL: vpaddd_mask_broadcast_test:
    461 ; CHECK:       ## BB#0:
    462 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    463 ; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
    464 ; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
    465 ; CHECK-NEXT:    retq
    466   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    467   %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    468   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
    469   ret <16 x i32> %r
    470 }
    471 
    472 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
    473 ; CHECK-LABEL: vpaddd_maskz_fold_test:
    474 ; CHECK:       ## BB#0:
    475 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    476 ; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
    477 ; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
    478 ; CHECK-NEXT:    retq
    479   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    480   %j = load <16 x i32>, <16 x i32>* %j.ptr
    481   %x = add <16 x i32> %i, %j
    482   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    483   ret <16 x i32> %r
    484 }
    485 
    486 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
    487 ; CHECK-LABEL: vpaddd_maskz_broadcast_test:
    488 ; CHECK:       ## BB#0:
    489 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    490 ; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
    491 ; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
    492 ; CHECK-NEXT:    retq
    493   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    494   %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    495   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    496   ret <16 x i32> %r
    497 }
    498 
    499 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
    500 ; CHECK-LABEL: vpsubq_test:
    501 ; CHECK:       ## BB#0:
    502 ; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm0
    503 ; CHECK-NEXT:    retq
    504   %x = sub <8 x i64> %i, %j
    505   ret <8 x i64> %x
    506 }
    507 
    508 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
    509 ; CHECK-LABEL: vpsubd_test:
    510 ; CHECK:       ## BB#0:
    511 ; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm0
    512 ; CHECK-NEXT:    retq
    513   %x = sub <16 x i32> %i, %j
    514   ret <16 x i32> %x
    515 }
    516 
    517 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
    518 ; CHECK-LABEL: vpmulld_test:
    519 ; CHECK:       ## BB#0:
    520 ; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm0
    521 ; CHECK-NEXT:    retq
    522   %x = mul <16 x i32> %i, %j
    523   ret <16 x i32> %x
    524 }
    525 
    526 declare float @sqrtf(float) readnone
    527 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
    528 ; CHECK-LABEL: sqrtA:
    529 ; CHECK:       ## BB#0: ## %entry
    530 ; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
    531 ; CHECK-NEXT:    retq
    532 entry:
    533   %conv1 = tail call float @sqrtf(float %a) nounwind readnone
    534   ret float %conv1
    535 }
    536 
    537 declare double @sqrt(double) readnone
    538 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
    539 ; CHECK-LABEL: sqrtB:
    540 ; CHECK:       ## BB#0: ## %entry
    541 ; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
    542 ; CHECK-NEXT:    retq
    543 entry:
    544   %call = tail call double @sqrt(double %a) nounwind readnone
    545   ret double %call
    546 }
    547 
    548 declare float @llvm.sqrt.f32(float)
    549 define float @sqrtC(float %a) nounwind {
    550 ; CHECK-LABEL: sqrtC:
    551 ; CHECK:       ## BB#0:
    552 ; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
    553 ; CHECK-NEXT:    retq
    554   %b = call float @llvm.sqrt.f32(float %a)
    555   ret float %b
    556 }
    557 
    558 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
    559 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
    560 ; CHECK-LABEL: sqrtD:
    561 ; CHECK:       ## BB#0:
    562 ; CHECK-NEXT:    vsqrtps %zmm0, %zmm0
    563 ; CHECK-NEXT:    retq
    564   %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
    565   ret <16 x float> %b
    566 }
    567 
    568 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
    569 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
    570 ; CHECK-LABEL: sqrtE:
    571 ; CHECK:       ## BB#0:
    572 ; CHECK-NEXT:    vsqrtpd %zmm0, %zmm0
    573 ; CHECK-NEXT:    retq
    574   %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
    575   ret <8 x double> %b
    576 }
    577 
    578 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
    579 ; CHECK-LABEL: fadd_broadcast:
    580 ; CHECK:       ## BB#0:
    581 ; CHECK-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
    582 ; CHECK-NEXT:    retq
    583   %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
    584   ret <16 x float> %b
    585 }
    586 
    587 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
    588 ; CHECK-LABEL: addq_broadcast:
    589 ; CHECK:       ## BB#0:
    590 ; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
    591 ; CHECK-NEXT:    retq
    592   %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
    593   ret <8 x i64> %b
    594 }
    595 
    596 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
    597 ; CHECK-LABEL: orq_broadcast:
    598 ; CHECK:       ## BB#0:
    599 ; CHECK-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
    600 ; CHECK-NEXT:    retq
    601   %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
    602   ret <8 x i64> %b
    603 }
    604 
    605 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
    606 ; CHECK-LABEL: andd512fold:
    607 ; CHECK:       ## BB#0: ## %entry
    608 ; CHECK-NEXT:    vpandd (%rdi), %zmm0, %zmm0
    609 ; CHECK-NEXT:    retq
    610 entry:
    611   %a = load <16 x i32>, <16 x i32>* %x, align 4
    612   %b = and <16 x i32> %y, %a
    613   ret <16 x i32> %b
    614 }
    615 
    616 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
    617 ; CHECK-LABEL: andqbrst:
    618 ; CHECK:       ## BB#0: ## %entry
    619 ; CHECK-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
    620 ; CHECK-NEXT:    retq
    621 entry:
    622   %a = load i64, i64* %ap, align 8
    623   %b = insertelement <8 x i64> undef, i64 %a, i32 0
    624   %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
    625   %d = and <8 x i64> %p1, %c
    626   ret <8 x i64>%d
    627 }
    628 
    629 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
    630 ; CHECK-LABEL: test_mask_vaddps:
    631 ; CHECK:       ## BB#0:
    632 ; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
    633 ; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    634 ; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1}
    635 ; CHECK-NEXT:    retq
    636                                      <16 x float> %j, <16 x i32> %mask1)
    637                                      nounwind readnone {
    638   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    639   %x = fadd <16 x float> %i, %j
    640   %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
    641   ret <16 x float> %r
    642 }
    643 
    644 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
    645 ; CHECK-LABEL: test_mask_vmulps:
    646 ; CHECK:       ## BB#0:
    647 ; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
    648 ; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    649 ; CHECK-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1}
    650 ; CHECK-NEXT:    retq
    651                                      <16 x float> %j, <16 x i32> %mask1)
    652                                      nounwind readnone {
    653   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    654   %x = fmul <16 x float> %i, %j
    655   %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
    656   ret <16 x float> %r
    657 }
    658 
    659 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
    660 ; CHECK-LABEL: test_mask_vminps:
    661 ; CHECK:       ## BB#0:
    662 ; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
    663 ; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    664 ; CHECK-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1}
    665 ; CHECK-NEXT:    retq
    666                                      <16 x float> %j, <16 x i32> %mask1)
    667                                      nounwind readnone {
    668   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    669   %cmp_res = fcmp olt <16 x float> %i, %j
    670   %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
    671   %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
    672   ret <16 x float> %r
    673 }
    674 
    675 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
    676 ; AVX512F-LABEL: test_mask_vminpd:
    677 ; AVX512F:       ## BB#0:
    678 ; AVX512F-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
    679 ; AVX512F-NEXT:    vpxor %ymm4, %ymm4, %ymm4
    680 ; AVX512F-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    681 ; AVX512F-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
    682 ; AVX512F-NEXT:    retq
    683 ;
    684 ; AVX512VL-LABEL: test_mask_vminpd:
    685 ; AVX512VL:       ## BB#0:
    686 ; AVX512VL-NEXT:    vpxord %ymm4, %ymm4, %ymm4
    687 ; AVX512VL-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
    688 ; AVX512VL-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
    689 ; AVX512VL-NEXT:    retq
    690 ;
    691 ; AVX512BW-LABEL: test_mask_vminpd:
    692 ; AVX512BW:       ## BB#0:
    693 ; AVX512BW-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
    694 ; AVX512BW-NEXT:    vpxor %ymm4, %ymm4, %ymm4
    695 ; AVX512BW-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    696 ; AVX512BW-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
    697 ; AVX512BW-NEXT:    retq
    698 ;
    699 ; AVX512DQ-LABEL: test_mask_vminpd:
    700 ; AVX512DQ:       ## BB#0:
    701 ; AVX512DQ-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
    702 ; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
    703 ; AVX512DQ-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    704 ; AVX512DQ-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
    705 ; AVX512DQ-NEXT:    retq
    706 ;
    707 ; SKX-LABEL: test_mask_vminpd:
    708 ; SKX:       ## BB#0:
    709 ; SKX-NEXT:    vpxord %ymm4, %ymm4, %ymm4
    710 ; SKX-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
    711 ; SKX-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
    712 ; SKX-NEXT:    retq
    713                                      <8 x double> %j, <8 x i32> %mask1)
    714                                      nounwind readnone {
    715   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    716   %cmp_res = fcmp olt <8 x double> %i, %j
    717   %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
    718   %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
    719   ret <8 x double> %r
    720 }
    721 
    722 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
    723 ; CHECK-LABEL: test_mask_vmaxps:
    724 ; CHECK:       ## BB#0:
    725 ; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
    726 ; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    727 ; CHECK-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1}
    728 ; CHECK-NEXT:    retq
    729                                      <16 x float> %j, <16 x i32> %mask1)
    730                                      nounwind readnone {
    731   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    732   %cmp_res = fcmp ogt <16 x float> %i, %j
    733   %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
    734   %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
    735   ret <16 x float> %r
    736 }
    737 
    738 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
    739 ; AVX512F-LABEL: test_mask_vmaxpd:
    740 ; AVX512F:       ## BB#0:
    741 ; AVX512F-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
    742 ; AVX512F-NEXT:    vpxor %ymm4, %ymm4, %ymm4
    743 ; AVX512F-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    744 ; AVX512F-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
    745 ; AVX512F-NEXT:    retq
    746 ;
    747 ; AVX512VL-LABEL: test_mask_vmaxpd:
    748 ; AVX512VL:       ## BB#0:
    749 ; AVX512VL-NEXT:    vpxord %ymm4, %ymm4, %ymm4
    750 ; AVX512VL-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
    751 ; AVX512VL-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
    752 ; AVX512VL-NEXT:    retq
    753 ;
    754 ; AVX512BW-LABEL: test_mask_vmaxpd:
    755 ; AVX512BW:       ## BB#0:
    756 ; AVX512BW-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
    757 ; AVX512BW-NEXT:    vpxor %ymm4, %ymm4, %ymm4
    758 ; AVX512BW-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    759 ; AVX512BW-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
    760 ; AVX512BW-NEXT:    retq
    761 ;
    762 ; AVX512DQ-LABEL: test_mask_vmaxpd:
    763 ; AVX512DQ:       ## BB#0:
    764 ; AVX512DQ-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
    765 ; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
    766 ; AVX512DQ-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    767 ; AVX512DQ-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
    768 ; AVX512DQ-NEXT:    retq
    769 ;
    770 ; SKX-LABEL: test_mask_vmaxpd:
    771 ; SKX:       ## BB#0:
    772 ; SKX-NEXT:    vpxord %ymm4, %ymm4, %ymm4
    773 ; SKX-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
    774 ; SKX-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
    775 ; SKX-NEXT:    retq
    776                                      <8 x double> %j, <8 x i32> %mask1)
    777                                      nounwind readnone {
    778   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    779   %cmp_res = fcmp ogt <8 x double> %i, %j
    780   %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
    781   %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
    782   ret <8 x double> %r
    783 }
    784 
    785 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
    786 ; CHECK-LABEL: test_mask_vsubps:
    787 ; CHECK:       ## BB#0:
    788 ; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
    789 ; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    790 ; CHECK-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1}
    791 ; CHECK-NEXT:    retq
    792                                      <16 x float> %j, <16 x i32> %mask1)
    793                                      nounwind readnone {
    794   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    795   %x = fsub <16 x float> %i, %j
    796   %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
    797   ret <16 x float> %r
    798 }
    799 
    800 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
    801 ; CHECK-LABEL: test_mask_vdivps:
    802 ; CHECK:       ## BB#0:
    803 ; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
    804 ; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
    805 ; CHECK-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1}
    806 ; CHECK-NEXT:    retq
    807                                      <16 x float> %j, <16 x i32> %mask1)
    808                                      nounwind readnone {
    809   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    810   %x = fdiv <16 x float> %i, %j
    811   %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
    812   ret <16 x float> %r
    813 }
    814 
    815 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
    816 ; CHECK-LABEL: test_mask_vaddpd:
    817 ; CHECK:       ## BB#0:
    818 ; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
    819 ; CHECK-NEXT:    vpcmpneqq %zmm4, %zmm3, %k1
    820 ; CHECK-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1}
    821 ; CHECK-NEXT:    retq
    822                                      <8 x double> %j, <8 x i64> %mask1)
    823                                      nounwind readnone {
    824   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    825   %x = fadd <8 x double> %i, %j
    826   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
    827   ret <8 x double> %r
    828 }
    829 
    830 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
    831 ; CHECK-LABEL: test_maskz_vaddpd:
    832 ; CHECK:       ## BB#0:
    833 ; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
    834 ; CHECK-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1
    835 ; CHECK-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
    836 ; CHECK-NEXT:    retq
    837                                       <8 x i64> %mask1) nounwind readnone {
    838   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    839   %x = fadd <8 x double> %i, %j
    840   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
    841   ret <8 x double> %r
    842 }
    843 
    844 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
    845 ; CHECK-LABEL: test_mask_fold_vaddpd:
    846 ; CHECK:       ## BB#0:
    847 ; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
    848 ; CHECK-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1
    849 ; CHECK-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1}
    850 ; CHECK-NEXT:    retq
    851                                      <8 x double>* %j,  <8 x i64> %mask1)
    852                                      nounwind {
    853   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    854   %tmp = load <8 x double>, <8 x double>* %j, align 8
    855   %x = fadd <8 x double> %i, %tmp
    856   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
    857   ret <8 x double> %r
    858 }
    859 
    860 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
    861 ; CHECK-LABEL: test_maskz_fold_vaddpd:
    862 ; CHECK:       ## BB#0:
    863 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    864 ; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
    865 ; CHECK-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
    866 ; CHECK-NEXT:    retq
    867                                       <8 x i64> %mask1) nounwind {
    868   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    869   %tmp = load <8 x double>, <8 x double>* %j, align 8
    870   %x = fadd <8 x double> %i, %tmp
    871   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
    872   ret <8 x double> %r
    873 }
    874 
    875 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
    876 ; CHECK-LABEL: test_broadcast_vaddpd:
    877 ; CHECK:       ## BB#0:
    878 ; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0
    879 ; CHECK-NEXT:    retq
    880   %tmp = load double, double* %j
    881   %b = insertelement <8 x double> undef, double %tmp, i32 0
    882   %c = shufflevector <8 x double> %b, <8 x double> undef,
    883                      <8 x i32> zeroinitializer
    884   %x = fadd <8 x double> %c, %i
    885   ret <8 x double> %x
    886 }
    887 
    888 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
    889 ; CHECK-LABEL: test_mask_broadcast_vaddpd:
    890 ; CHECK:       ## BB#0:
    891 ; CHECK-NEXT:    vpxord %zmm0, %zmm0, %zmm0
    892 ; CHECK-NEXT:    vpcmpneqq %zmm0, %zmm2, %k1
    893 ; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
    894 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    895 ; CHECK-NEXT:    retq
    896                                       double* %j, <8 x i64> %mask1) nounwind {
    897   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    898   %tmp = load double, double* %j
    899   %b = insertelement <8 x double> undef, double %tmp, i32 0
    900   %c = shufflevector <8 x double> %b, <8 x double> undef,
    901                      <8 x i32> zeroinitializer
    902   %x = fadd <8 x double> %c, %i
    903   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
    904   ret <8 x double> %r
    905 }
    906 
    907 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
    908 ; CHECK-LABEL: test_maskz_broadcast_vaddpd:
    909 ; CHECK:       ## BB#0:
    910 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    911 ; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
    912 ; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
    913 ; CHECK-NEXT:    retq
    914                                        <8 x i64> %mask1) nounwind {
    915   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    916   %tmp = load double, double* %j
    917   %b = insertelement <8 x double> undef, double %tmp, i32 0
    918   %c = shufflevector <8 x double> %b, <8 x double> undef,
    919                      <8 x i32> zeroinitializer
    920   %x = fadd <8 x double> %c, %i
    921   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
    922   ret <8 x double> %r
    923 }
    924 
    925 define <16 x float>  @test_fxor(<16 x float> %a) {
    926 ; AVX512F-LABEL: test_fxor:
    927 ; AVX512F:       ## BB#0:
    928 ; AVX512F-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
    929 ; AVX512F-NEXT:    retq
    930 ;
    931 ; AVX512VL-LABEL: test_fxor:
    932 ; AVX512VL:       ## BB#0:
    933 ; AVX512VL-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
    934 ; AVX512VL-NEXT:    retq
    935 ;
    936 ; AVX512BW-LABEL: test_fxor:
    937 ; AVX512BW:       ## BB#0:
    938 ; AVX512BW-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
    939 ; AVX512BW-NEXT:    retq
    940 ;
    941 ; AVX512DQ-LABEL: test_fxor:
    942 ; AVX512DQ:       ## BB#0:
    943 ; AVX512DQ-NEXT:    vxorps {{.*}}(%rip), %zmm0, %zmm0
    944 ; AVX512DQ-NEXT:    retq
    945 ;
    946 ; SKX-LABEL: test_fxor:
    947 ; SKX:       ## BB#0:
    948 ; SKX-NEXT:    vxorps {{.*}}(%rip), %zmm0, %zmm0
    949 ; SKX-NEXT:    retq
    950 
    951   %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
    952   ret <16 x float>%res
    953 }
    954 
    955 define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
    956 ; CHECK-LABEL: test_fxor_8f32:
    957 ; CHECK:       ## BB#0:
    958 ; CHECK-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
    959 ; CHECK-NEXT:    retq
    960   %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
    961   ret <8 x float>%res
    962 }
    963 
    964 define <8 x double> @fabs_v8f64(<8 x double> %p)
    965 ; AVX512F-LABEL: fabs_v8f64:
    966 ; AVX512F:       ## BB#0:
    967 ; AVX512F-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    968 ; AVX512F-NEXT:    retq
    969 ;
    970 ; AVX512VL-LABEL: fabs_v8f64:
    971 ; AVX512VL:       ## BB#0:
    972 ; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    973 ; AVX512VL-NEXT:    retq
    974 ;
    975 ; AVX512BW-LABEL: fabs_v8f64:
    976 ; AVX512BW:       ## BB#0:
    977 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    978 ; AVX512BW-NEXT:    retq
    979 ;
    980 ; AVX512DQ-LABEL: fabs_v8f64:
    981 ; AVX512DQ:       ## BB#0:
    982 ; AVX512DQ-NEXT:    vandpd {{.*}}(%rip), %zmm0, %zmm0
    983 ; AVX512DQ-NEXT:    retq
    984 ;
    985 ; SKX-LABEL: fabs_v8f64:
    986 ; SKX:       ## BB#0:
    987 ; SKX-NEXT:    vandpd {{.*}}(%rip), %zmm0, %zmm0
    988 ; SKX-NEXT:    retq
    989 {
    990   %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
    991   ret <8 x double> %t
    992 }
    993 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
    994 
    995 define <16 x float> @fabs_v16f32(<16 x float> %p)
    996 ; AVX512F-LABEL: fabs_v16f32:
    997 ; AVX512F:       ## BB#0:
    998 ; AVX512F-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
    999 ; AVX512F-NEXT:    retq
   1000 ;
   1001 ; AVX512VL-LABEL: fabs_v16f32:
   1002 ; AVX512VL:       ## BB#0:
   1003 ; AVX512VL-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
   1004 ; AVX512VL-NEXT:    retq
   1005 ;
   1006 ; AVX512BW-LABEL: fabs_v16f32:
   1007 ; AVX512BW:       ## BB#0:
   1008 ; AVX512BW-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
   1009 ; AVX512BW-NEXT:    retq
   1010 ;
   1011 ; AVX512DQ-LABEL: fabs_v16f32:
   1012 ; AVX512DQ:       ## BB#0:
   1013 ; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
   1014 ; AVX512DQ-NEXT:    retq
   1015 ;
   1016 ; SKX-LABEL: fabs_v16f32:
   1017 ; SKX:       ## BB#0:
   1018 ; SKX-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
   1019 ; SKX-NEXT:    retq
   1020 {
   1021   %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
   1022   ret <16 x float> %t
   1023 }
   1024 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
   1025