1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X32 3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c 6 7 define <2 x i64> @test_mm_broadcastb_epi8(<2 x i64> %a0) { 8 ; X32-LABEL: test_mm_broadcastb_epi8: 9 ; X32: # BB#0: 10 ; X32-NEXT: vpbroadcastb %xmm0, %xmm0 11 ; X32-NEXT: retl 12 ; 13 ; X64-LABEL: test_mm_broadcastb_epi8: 14 ; X64: # BB#0: 15 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0 16 ; X64-NEXT: retq 17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 18 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <16 x i32> zeroinitializer 19 %res1 = bitcast <16 x i8> %res0 to <2 x i64> 20 ret <2 x i64> %res1 21 } 22 23 define <2 x i64> @test_mm_mask_broadcastb_epi8(<2 x i64> %a0, i16 %a1, <2 x i64> %a2) { 24 ; X32-LABEL: test_mm_mask_broadcastb_epi8: 25 ; X32: # BB#0: 26 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 27 ; X32-NEXT: kmovw %eax, %k1 28 ; X32-NEXT: vpbroadcastb %xmm1, %xmm0 {%k1} 29 ; X32-NEXT: retl 30 ; 31 ; X64-LABEL: test_mm_mask_broadcastb_epi8: 32 ; X64: # BB#0: 33 ; X64-NEXT: kmovw %edi, %k1 34 ; X64-NEXT: vpbroadcastb %xmm1, %xmm0 {%k1} 35 ; X64-NEXT: retq 36 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 37 %arg1 = bitcast i16 %a1 to <16 x i1> 38 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 39 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <16 x i32> zeroinitializer 40 %res1 = select <16 x i1> %arg1, <16 x i8> %res0, <16 x i8> %arg0 41 %res2 = bitcast <16 x i8> %res1 to <2 x i64> 42 ret <2 x i64> %res2 43 } 44 45 define <2 x i64> @test_mm_maskz_broadcastb_epi8(i16 %a0, <2 x i64> %a1) { 46 ; X32-LABEL: test_mm_maskz_broadcastb_epi8: 47 ; X32: # BB#0: 48 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 49 ; X32-NEXT: kmovw %eax, %k1 50 ; X32-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} 51 ; X32-NEXT: retl 52 ; 53 ; X64-LABEL: test_mm_maskz_broadcastb_epi8: 54 ; X64: # BB#0: 55 ; X64-NEXT: kmovw %edi, %k1 56 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} 57 ; X64-NEXT: retq 58 %arg0 = bitcast i16 %a0 to <16 x i1> 59 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 60 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <16 x i32> zeroinitializer 61 %res1 = select <16 x i1> %arg0, <16 x i8> %res0, <16 x i8> zeroinitializer 62 %res2 = bitcast <16 x i8> %res1 to <2 x i64> 63 ret <2 x i64> %res2 64 } 65 66 define <4 x i64> @test_mm256_broadcastb_epi8(<2 x i64> %a0) { 67 ; X32-LABEL: test_mm256_broadcastb_epi8: 68 ; X32: # BB#0: 69 ; X32-NEXT: vpbroadcastb %xmm0, %ymm0 70 ; X32-NEXT: retl 71 ; 72 ; X64-LABEL: test_mm256_broadcastb_epi8: 73 ; X64: # BB#0: 74 ; X64-NEXT: vpbroadcastb %xmm0, %ymm0 75 ; X64-NEXT: retq 76 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 77 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <32 x i32> zeroinitializer 78 %res1 = bitcast <32 x i8> %res0 to <4 x i64> 79 ret <4 x i64> %res1 80 } 81 82 define <4 x i64> @test_mm256_mask_broadcastb_epi8(<4 x i64> %a0, i32 %a1, <2 x i64> %a2) { 83 ; X32-LABEL: test_mm256_mask_broadcastb_epi8: 84 ; X32: # BB#0: 85 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 86 ; X32-NEXT: kmovd %eax, %k1 87 ; X32-NEXT: vpbroadcastb %xmm1, %ymm0 {%k1} 88 ; X32-NEXT: retl 89 ; 90 ; X64-LABEL: test_mm256_mask_broadcastb_epi8: 91 ; X64: # BB#0: 92 ; X64-NEXT: kmovd %edi, %k1 93 ; X64-NEXT: vpbroadcastb %xmm1, %ymm0 {%k1} 94 ; X64-NEXT: retq 95 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 96 %arg1 = bitcast i32 %a1 to <32 x i1> 97 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 98 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <32 x i32> zeroinitializer 99 %res1 = select <32 x i1> %arg1, <32 x i8> %res0, <32 x i8> %arg0 100 %res2 = bitcast <32 x i8> %res1 to <4 x i64> 101 ret <4 x i64> %res2 102 } 103 104 define <4 x i64> @test_mm256_maskz_broadcastb_epi8(i32 %a0, <2 x i64> %a1) { 105 ; X32-LABEL: test_mm256_maskz_broadcastb_epi8: 106 ; X32: # BB#0: 107 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 108 ; X32-NEXT: kmovd %eax, %k1 109 ; X32-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} 110 ; X32-NEXT: retl 111 ; 112 ; X64-LABEL: test_mm256_maskz_broadcastb_epi8: 113 ; X64: # BB#0: 114 ; X64-NEXT: kmovd %edi, %k1 115 ; X64-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} 116 ; X64-NEXT: retq 117 %arg0 = bitcast i32 %a0 to <32 x i1> 118 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 119 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <32 x i32> zeroinitializer 120 %res1 = select <32 x i1> %arg0, <32 x i8> %res0, <32 x i8> zeroinitializer 121 %res2 = bitcast <32 x i8> %res1 to <4 x i64> 122 ret <4 x i64> %res2 123 } 124 125 define <2 x i64> @test_mm_broadcastw_epi16(<2 x i64> %a0) { 126 ; X32-LABEL: test_mm_broadcastw_epi16: 127 ; X32: # BB#0: 128 ; X32-NEXT: vpbroadcastw %xmm0, %xmm0 129 ; X32-NEXT: retl 130 ; 131 ; X64-LABEL: test_mm_broadcastw_epi16: 132 ; X64: # BB#0: 133 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0 134 ; X64-NEXT: retq 135 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 136 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> zeroinitializer 137 %res1 = bitcast <8 x i16> %res0 to <2 x i64> 138 ret <2 x i64> %res1 139 } 140 141 define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) { 142 ; X32-LABEL: test_mm_mask_broadcastw_epi16: 143 ; X32: # BB#0: 144 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al 145 ; X32-NEXT: kmovw %eax, %k1 146 ; X32-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1} 147 ; X32-NEXT: retl 148 ; 149 ; X64-LABEL: test_mm_mask_broadcastw_epi16: 150 ; X64: # BB#0: 151 ; X64-NEXT: kmovw %edi, %k1 152 ; X64-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1} 153 ; X64-NEXT: retq 154 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 155 %arg1 = bitcast i8 %a1 to <8 x i1> 156 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 157 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <8 x i32> zeroinitializer 158 %res1 = select <8 x i1> %arg1, <8 x i16> %res0, <8 x i16> %arg0 159 %res2 = bitcast <8 x i16> %res1 to <2 x i64> 160 ret <2 x i64> %res2 161 } 162 163 define <2 x i64> @test_mm_maskz_broadcastw_epi16(i8 %a0, <2 x i64> %a1) { 164 ; X32-LABEL: test_mm_maskz_broadcastw_epi16: 165 ; X32: # BB#0: 166 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al 167 ; X32-NEXT: kmovw %eax, %k1 168 ; X32-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} 169 ; X32-NEXT: retl 170 ; 171 ; X64-LABEL: test_mm_maskz_broadcastw_epi16: 172 ; X64: # BB#0: 173 ; X64-NEXT: kmovw %edi, %k1 174 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} 175 ; X64-NEXT: retq 176 %arg0 = bitcast i8 %a0 to <8 x i1> 177 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 178 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <8 x i32> zeroinitializer 179 %res1 = select <8 x i1> %arg0, <8 x i16> %res0, <8 x i16> zeroinitializer 180 %res2 = bitcast <8 x i16> %res1 to <2 x i64> 181 ret <2 x i64> %res2 182 } 183 184 define <4 x i64> @test_mm256_broadcastw_epi16(<2 x i64> %a0) { 185 ; X32-LABEL: test_mm256_broadcastw_epi16: 186 ; X32: # BB#0: 187 ; X32-NEXT: vpbroadcastw %xmm0, %ymm0 188 ; X32-NEXT: retl 189 ; 190 ; X64-LABEL: test_mm256_broadcastw_epi16: 191 ; X64: # BB#0: 192 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0 193 ; X64-NEXT: retq 194 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 195 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <16 x i32> zeroinitializer 196 %res1 = bitcast <16 x i16> %res0 to <4 x i64> 197 ret <4 x i64> %res1 198 } 199 200 define <4 x i64> @test_mm256_mask_broadcastw_epi16(<4 x i64> %a0, i16 %a1, <2 x i64> %a2) { 201 ; X32-LABEL: test_mm256_mask_broadcastw_epi16: 202 ; X32: # BB#0: 203 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 204 ; X32-NEXT: kmovw %eax, %k1 205 ; X32-NEXT: vpbroadcastw %xmm1, %ymm0 {%k1} 206 ; X32-NEXT: retl 207 ; 208 ; X64-LABEL: test_mm256_mask_broadcastw_epi16: 209 ; X64: # BB#0: 210 ; X64-NEXT: kmovw %edi, %k1 211 ; X64-NEXT: vpbroadcastw %xmm1, %ymm0 {%k1} 212 ; X64-NEXT: retq 213 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 214 %arg1 = bitcast i16 %a1 to <16 x i1> 215 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 216 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <16 x i32> zeroinitializer 217 %res1 = select <16 x i1> %arg1, <16 x i16> %res0, <16 x i16> %arg0 218 %res2 = bitcast <16 x i16> %res1 to <4 x i64> 219 ret <4 x i64> %res2 220 } 221 222 define <4 x i64> @test_mm256_maskz_broadcastw_epi16(i16 %a0, <2 x i64> %a1) { 223 ; X32-LABEL: test_mm256_maskz_broadcastw_epi16: 224 ; X32: # BB#0: 225 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 226 ; X32-NEXT: kmovw %eax, %k1 227 ; X32-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} 228 ; X32-NEXT: retl 229 ; 230 ; X64-LABEL: test_mm256_maskz_broadcastw_epi16: 231 ; X64: # BB#0: 232 ; X64-NEXT: kmovw %edi, %k1 233 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} 234 ; X64-NEXT: retq 235 %arg0 = bitcast i16 %a0 to <16 x i1> 236 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 237 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <16 x i32> zeroinitializer 238 %res1 = select <16 x i1> %arg0, <16 x i16> %res0, <16 x i16> zeroinitializer 239 %res2 = bitcast <16 x i16> %res1 to <4 x i64> 240 ret <4 x i64> %res2 241 } 242 243 !0 = !{i32 1} 244 245