1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8) 6 7 define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) { 8 ; X86-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: 9 ; X86: # %bb.0: 10 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm2 # encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01] 11 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 12 ; X86-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x19,0xc1,0x01] 13 ; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x58,0xca] 14 ; X86-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x19,0xc0,0x01] 15 ; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 16 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 17 ; X86-NEXT: retl # encoding: [0xc3] 18 ; 19 ; X64-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: 20 ; X64: # %bb.0: 21 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 # encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01] 22 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 23 ; X64-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x19,0xc1,0x01] 24 ; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x58,0xca] 25 ; X64-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x19,0xc0,0x01] 26 ; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 27 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 28 ; X64-NEXT: retq # encoding: [0xc3] 29 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) 30 %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) 31 %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1) 32 %res3 = fadd <2 x double> %res, %res1 33 %res4 = fadd <2 x double> %res2, %res3 34 ret <2 x double> %res4 35 } 36 37 declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8) 38 39 define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) { 40 ; X86-LABEL: test_int_x86_avx512_mask_vextractf32x8: 41 ; X86: # %bb.0: 42 ; X86-NEXT: vextractf64x4 $1, %zmm0, %ymm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xc2,0x01] 43 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 44 ; X86-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1b,0xc1,0x01] 45 ; X86-NEXT: vaddps %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf4,0x58,0xca] 46 ; X86-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1b,0xc0,0x01] 47 ; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x58,0xc1] 48 ; X86-NEXT: retl # encoding: [0xc3] 49 ; 50 ; X64-LABEL: test_int_x86_avx512_mask_vextractf32x8: 51 ; X64: # %bb.0: 52 ; X64-NEXT: vextractf64x4 $1, %zmm0, %ymm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xc2,0x01] 53 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 54 ; X64-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1b,0xc1,0x01] 55 ; X64-NEXT: vaddps %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf4,0x58,0xca] 56 ; X64-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1b,0xc0,0x01] 57 ; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x58,0xc1] 58 ; X64-NEXT: retq # encoding: [0xc3] 59 %res = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3) 60 %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3) 61 %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1) 62 %res3 = fadd <8 x float> %res, %res1 63 %res4 = fadd <8 x float> %res2, %res3 64 ret <8 x float> %res4 65 } 66 67 declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16) 68 69 define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) { 70 ; X86-LABEL: test_int_x86_avx512_mask_insertf32x8_512: 71 ; X86: # %bb.0: 72 ; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd9,0x01] 73 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 74 ; X86-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xd1,0x01] 75 ; X86-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc1,0x01] 76 ; X86-NEXT: vaddps %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0] 77 ; X86-NEXT: vaddps %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x64,0x48,0x58,0xc0] 78 ; X86-NEXT: retl # encoding: [0xc3] 79 ; 80 ; X64-LABEL: test_int_x86_avx512_mask_insertf32x8_512: 81 ; X64: # %bb.0: 82 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd9,0x01] 83 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 84 ; X64-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xd1,0x01] 85 ; X64-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc1,0x01] 86 ; X64-NEXT: vaddps %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0] 87 ; X64-NEXT: vaddps %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x64,0x48,0x58,0xc0] 88 ; X64-NEXT: retq # encoding: [0xc3] 89 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4) 90 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4) 91 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1) 92 %res3 = fadd <16 x float> %res, %res1 93 %res4 = fadd <16 x float> %res2, %res3 94 ret <16 x float> %res4 95 } 96 97 declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8) 98 99 define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) { 100 ; X86-LABEL: test_int_x86_avx512_mask_insertf64x2_512: 101 ; X86: # %bb.0: 102 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x18,0xd9,0x01] 103 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 104 ; X86-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x18,0xd1,0x01] 105 ; X86-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x18,0xc1,0x01] 106 ; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 107 ; X86-NEXT: vaddpd %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc3] 108 ; X86-NEXT: retl # encoding: [0xc3] 109 ; 110 ; X64-LABEL: test_int_x86_avx512_mask_insertf64x2_512: 111 ; X64: # %bb.0: 112 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x18,0xd9,0x01] 113 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 114 ; X64-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x18,0xd1,0x01] 115 ; X64-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x18,0xc1,0x01] 116 ; X64-NEXT: vaddpd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 117 ; X64-NEXT: vaddpd %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc3] 118 ; X64-NEXT: retq # encoding: [0xc3] 119 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) 120 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) 121 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1) 122 %res3 = fadd <8 x double> %res, %res1 123 %res4 = fadd <8 x double> %res3, %res2 124 ret <8 x double> %res4 125 } 126 127 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16) 128 129 define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) { 130 ; X86-LABEL: test_int_x86_avx512_mask_inserti32x8_512: 131 ; X86: # %bb.0: 132 ; X86-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd9,0x01] 133 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 134 ; X86-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xd1,0x01] 135 ; X86-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc1,0x01] 136 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 137 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 138 ; X86-NEXT: retl # encoding: [0xc3] 139 ; 140 ; X64-LABEL: test_int_x86_avx512_mask_inserti32x8_512: 141 ; X64: # %bb.0: 142 ; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd9,0x01] 143 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 144 ; X64-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xd1,0x01] 145 ; X64-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc1,0x01] 146 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 147 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 148 ; X64-NEXT: retq # encoding: [0xc3] 149 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4) 150 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4) 151 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1) 152 %res3 = add <16 x i32> %res, %res1 153 %res4 = add <16 x i32> %res3, %res2 154 ret <16 x i32> %res4 155 } 156 157 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8) 158 159 define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) { 160 ; X86-LABEL: test_int_x86_avx512_mask_inserti64x2_512: 161 ; X86: # %bb.0: 162 ; X86-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x38,0xd9,0x01] 163 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 164 ; X86-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x38,0xd1,0x01] 165 ; X86-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x38,0xc1,0x01] 166 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 167 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 168 ; X86-NEXT: retl # encoding: [0xc3] 169 ; 170 ; X64-LABEL: test_int_x86_avx512_mask_inserti64x2_512: 171 ; X64: # %bb.0: 172 ; X64-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x38,0xd9,0x01] 173 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 174 ; X64-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x38,0xd1,0x01] 175 ; X64-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x38,0xc1,0x01] 176 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 177 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 178 ; X64-NEXT: retq # encoding: [0xc3] 179 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) 180 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) 181 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) 182 %res3 = add <8 x i64> %res, %res1 183 %res4 = add <8 x i64> %res2, %res3 184 ret <8 x i64> %res4 185 } 186 187 188 declare <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16) 189 190 define <16 x i32>@test_int_x86_avx512_cvtmask2d_512(i16 %x0) { 191 ; X86-LABEL: test_int_x86_avx512_cvtmask2d_512: 192 ; X86: # %bb.0: 193 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04] 194 ; X86-NEXT: vpmovm2d %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0] 195 ; X86-NEXT: retl # encoding: [0xc3] 196 ; 197 ; X64-LABEL: test_int_x86_avx512_cvtmask2d_512: 198 ; X64: # %bb.0: 199 ; X64-NEXT: kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7] 200 ; X64-NEXT: vpmovm2d %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0] 201 ; X64-NEXT: retq # encoding: [0xc3] 202 %res = call <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16 %x0) 203 ret <16 x i32> %res 204 } 205 206 declare <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8) 207 208 define <8 x i64>@test_int_x86_avx512_cvtmask2q_512(i8 %x0) { 209 ; X86-LABEL: test_int_x86_avx512_cvtmask2q_512: 210 ; X86: # %bb.0: 211 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04] 212 ; X86-NEXT: vpmovm2q %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x38,0xc0] 213 ; X86-NEXT: retl # encoding: [0xc3] 214 ; 215 ; X64-LABEL: test_int_x86_avx512_cvtmask2q_512: 216 ; X64: # %bb.0: 217 ; X64-NEXT: kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7] 218 ; X64-NEXT: vpmovm2q %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x38,0xc0] 219 ; X64-NEXT: retq # encoding: [0xc3] 220 %res = call <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8 %x0) 221 ret <8 x i64> %res 222 } 223 224 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float>, <16 x float>, i16) 225 226 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, <16 x float> %x2, i16 %mask) { 227 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512: 228 ; X86: # %bb.0: 229 ; X86-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 230 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01] 231 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 232 ; X86-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xc8,0x01] 233 ; X86-NEXT: vaddps %zmm1, %zmm2, %zmm1 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9] 234 ; X86-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc0,0x01] 235 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 236 ; X86-NEXT: retl # encoding: [0xc3] 237 ; 238 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512: 239 ; X64: # %bb.0: 240 ; X64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 241 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01] 242 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 243 ; X64-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xc8,0x01] 244 ; X64-NEXT: vaddps %zmm1, %zmm2, %zmm1 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9] 245 ; X64-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc0,0x01] 246 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 247 ; X64-NEXT: retq # encoding: [0xc3] 248 249 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 -1) 250 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask) 251 %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> zeroinitializer, i16 %mask) 252 %res4 = fadd <16 x float> %res1, %res2 253 %res5 = fadd <16 x float> %res3, %res4 254 ret <16 x float> %res5 255 } 256 257 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512_load(<8 x float>* %x0ptr, <16 x float> %x2, i16 %mask) { 258 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512_load: 259 ; X86: # %bb.0: 260 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 261 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 262 ; X86-NEXT: vbroadcastf32x8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1b,0x00] 263 ; X86-NEXT: # zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 264 ; X86-NEXT: retl # encoding: [0xc3] 265 ; 266 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512_load: 267 ; X64: # %bb.0: 268 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 269 ; X64-NEXT: vbroadcastf32x8 (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1b,0x07] 270 ; X64-NEXT: # zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 271 ; X64-NEXT: retq # encoding: [0xc3] 272 273 %x0 = load <8 x float>, <8 x float>* %x0ptr 274 %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask) 275 ret <16 x float> %res 276 } 277 278 declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x double>, i8) 279 280 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) { 281 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512: 282 ; X86: # %bb.0: 283 ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 284 ; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] 285 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01] 286 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 287 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01] 288 ; X86-NEXT: vaddpd %zmm1, %zmm2, %zmm1 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9] 289 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01] 290 ; X86-NEXT: vaddpd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1] 291 ; X86-NEXT: retl # encoding: [0xc3] 292 ; 293 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512: 294 ; X64: # %bb.0: 295 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 296 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] 297 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01] 298 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 299 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01] 300 ; X64-NEXT: vaddpd %zmm1, %zmm2, %zmm1 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9] 301 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01] 302 ; X64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1] 303 ; X64-NEXT: retq # encoding: [0xc3] 304 305 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 -1) 306 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask) 307 %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> zeroinitializer, i8 %mask) 308 %res4 = fadd <8 x double> %res1, %res2 309 %res5 = fadd <8 x double> %res3, %res4 310 ret <8 x double> %res5 311 } 312 313 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512_load(<2 x double>* %x0ptr, <8 x double> %x2, i8 %mask) { 314 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512_load: 315 ; X86: # %bb.0: 316 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 317 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 318 ; X86-NEXT: vbroadcastf64x2 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x1a,0x00] 319 ; X86-NEXT: # zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 320 ; X86-NEXT: retl # encoding: [0xc3] 321 ; 322 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512_load: 323 ; X64: # %bb.0: 324 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 325 ; X64-NEXT: vbroadcastf64x2 (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x1a,0x07] 326 ; X64-NEXT: # zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 327 ; X64-NEXT: retq # encoding: [0xc3] 328 329 %x0 = load <2 x double>, <2 x double>* %x0ptr 330 %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask) 331 ret <8 x double> %res 332 } 333 334 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32>, <16 x i32>, i16) 335 336 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) { 337 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512: 338 ; X86: # %bb.0: 339 ; X86-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 340 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01] 341 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 342 ; X86-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xc8,0x01] 343 ; X86-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc0,0x01] 344 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 345 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 346 ; X86-NEXT: retl # encoding: [0xc3] 347 ; 348 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512: 349 ; X64: # %bb.0: 350 ; X64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 351 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01] 352 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 353 ; X64-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xc8,0x01] 354 ; X64-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc0,0x01] 355 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 356 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 357 ; X64-NEXT: retq # encoding: [0xc3] 358 359 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 -1) 360 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) 361 %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) 362 %res4 = add <16 x i32> %res1, %res2 363 %res5 = add <16 x i32> %res3, %res4 364 ret <16 x i32> %res5 365 } 366 367 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512_load(<8 x i32>* %x0ptr, <16 x i32> %x2, i16 %mask) { 368 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512_load: 369 ; X86: # %bb.0: 370 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 371 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 372 ; X86-NEXT: vbroadcasti32x8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x5b,0x00] 373 ; X86-NEXT: # zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 374 ; X86-NEXT: retl # encoding: [0xc3] 375 ; 376 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512_load: 377 ; X64: # %bb.0: 378 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 379 ; X64-NEXT: vbroadcasti32x8 (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x5b,0x07] 380 ; X64-NEXT: # zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 381 ; X64-NEXT: retq # encoding: [0xc3] 382 383 %x0 = load <8 x i32>, <8 x i32>* %x0ptr 384 %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) 385 ret <16 x i32> %res 386 } 387 388 declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>, i8) 389 390 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) { 391 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512: 392 ; X86: # %bb.0: 393 ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 394 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01] 395 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01] 396 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 397 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01] 398 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01] 399 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 400 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 401 ; X86-NEXT: retl # encoding: [0xc3] 402 ; 403 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512: 404 ; X64: # %bb.0: 405 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 406 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01] 407 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01] 408 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 409 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01] 410 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01] 411 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 412 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 413 ; X64-NEXT: retq # encoding: [0xc3] 414 415 %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 -1) 416 %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) 417 %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask) 418 %res4 = add <8 x i64> %res1, %res2 419 %res5 = add <8 x i64> %res3, %res4 420 ret <8 x i64> %res5 421 } 422 423 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512_load(<2 x i64>* %x0ptr, <8 x i64> %x2, i8 %mask) { 424 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512_load: 425 ; X86: # %bb.0: 426 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 427 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 428 ; X86-NEXT: vbroadcasti64x2 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x5a,0x00] 429 ; X86-NEXT: # zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 430 ; X86-NEXT: retl # encoding: [0xc3] 431 ; 432 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512_load: 433 ; X64: # %bb.0: 434 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 435 ; X64-NEXT: vbroadcasti64x2 (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x5a,0x07] 436 ; X64-NEXT: # zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 437 ; X64-NEXT: retq # encoding: [0xc3] 438 439 %x0 = load <2 x i64>, <2 x i64>* %x0ptr 440 %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) 441 ret <8 x i64> %res 442 } 443 444 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16) 445 446 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) { 447 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512: 448 ; X86: # %bb.0: 449 ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 450 ; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] 451 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01] 452 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 453 ; X86-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xc8,0x01] 454 ; X86-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc0,0x01] 455 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] 456 ; X86-NEXT: vaddps %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2] 457 ; X86-NEXT: retl # encoding: [0xc3] 458 ; 459 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512: 460 ; X64: # %bb.0: 461 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 462 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] 463 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01] 464 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 465 ; X64-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xc8,0x01] 466 ; X64-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc0,0x01] 467 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] 468 ; X64-NEXT: vaddps %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2] 469 ; X64-NEXT: retq # encoding: [0xc3] 470 %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 %x3) 471 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3) 472 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1) 473 %res3 = fadd <16 x float> %res, %res1 474 %res4 = fadd <16 x float> %res3, %res2 475 ret <16 x float> %res4 476 } 477 478 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16) 479 480 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) { 481 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512: 482 ; X86: # %bb.0: 483 ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 484 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01] 485 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01] 486 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 487 ; X86-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xc8,0x01] 488 ; X86-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc0,0x01] 489 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 490 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 491 ; X86-NEXT: retl # encoding: [0xc3] 492 ; 493 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512: 494 ; X64: # %bb.0: 495 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 496 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01] 497 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01] 498 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 499 ; X64-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xc8,0x01] 500 ; X64-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc0,0x01] 501 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 502 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 503 ; X64-NEXT: retq # encoding: [0xc3] 504 %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) 505 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3) 506 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1) 507 %res3 = add <16 x i32> %res, %res1 508 %res4 = add <16 x i32> %res3, %res2 509 ret <16 x i32> %res4 510 } 511 512 declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>) 513 514 define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) { 515 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512: 516 ; CHECK: # %bb.0: 517 ; CHECK-NEXT: vpmovd2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x39,0xc0] 518 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 519 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 520 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 521 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 522 %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0) 523 ret i16 %res 524 } 525 526 declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>) 527 528 define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) { 529 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512: 530 ; CHECK: # %bb.0: 531 ; CHECK-NEXT: vpmovq2m %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x39,0xc0] 532 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 533 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 534 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 535 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 536 %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0) 537 ret i8 %res 538 } 539 540 declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8) 541 542 define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) { 543 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: 544 ; CHECK: # %bb.0: 545 ; CHECK-NEXT: vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02] 546 ; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04] 547 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 548 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 549 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 550 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 551 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) 552 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %res) 553 ret i8 %res1 554 } 555 declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16) 556 557 define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) { 558 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: 559 ; CHECK: # %bb.0: 560 ; CHECK-NEXT: vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02] 561 ; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04] 562 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 563 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 564 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 565 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 566 %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) 567 %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2, i16 %res) 568 ret i16 %res1 569 } 570