1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 2 3 define <16 x i32> @_inreg16xi32(i32 %a) { 4 ; CHECK-LABEL: _inreg16xi32: 5 ; CHECK: ## BB#0: 6 ; CHECK-NEXT: vpbroadcastd %edi, %zmm0 7 ; CHECK-NEXT: retq 8 %b = insertelement <16 x i32> undef, i32 %a, i32 0 9 %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer 10 ret <16 x i32> %c 11 } 12 13 define <8 x i64> @_inreg8xi64(i64 %a) { 14 ; CHECK-LABEL: _inreg8xi64: 15 ; CHECK: ## BB#0: 16 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 17 ; CHECK-NEXT: retq 18 %b = insertelement <8 x i64> undef, i64 %a, i32 0 19 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 20 ret <8 x i64> %c 21 } 22 23 ;CHECK-LABEL: _ss16xfloat_v4 24 ;CHECK: vbroadcastss %xmm0, %zmm0 25 ;CHECK: ret 26 define <16 x float> @_ss16xfloat_v4(<4 x float> %a) { 27 %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer 28 ret <16 x float> %b 29 } 30 31 define <16 x float> @_inreg16xfloat(float %a) { 32 ; CHECK-LABEL: _inreg16xfloat: 33 ; CHECK: ## BB#0: 34 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 35 ; CHECK-NEXT: retq 36 %b = insertelement <16 x float> undef, float %a, i32 0 37 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 38 ret <16 x float> %c 39 } 40 41 ;CHECK-LABEL: _ss16xfloat_mask: 42 ;CHECK: vbroadcastss %xmm0, %zmm1 {%k1} 43 ;CHECK: ret 44 define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) { 45 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 46 %b = insertelement <16 x float> undef, float %a, i32 0 47 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 48 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i 49 ret <16 x float> %r 50 } 51 52 ;CHECK-LABEL: _ss16xfloat_maskz: 53 ;CHECK: vbroadcastss %xmm0, %zmm0 {%k1} {z} 54 ;CHECK: ret 55 define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { 56 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 57 %b = insertelement <16 x float> undef, float %a, i32 0 58 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 59 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer 60 ret <16 x float> %r 61 } 62 63 ;CHECK-LABEL: _ss16xfloat_load: 64 ;CHECK: vbroadcastss (%{{.*}}, %zmm 65 ;CHECK: ret 66 define <16 x float> @_ss16xfloat_load(float* %a.ptr) { 67 %a = load float, float* %a.ptr 68 %b = insertelement <16 x float> undef, float %a, i32 0 69 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 70 ret <16 x float> %c 71 } 72 73 ;CHECK-LABEL: _ss16xfloat_mask_load: 74 ;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} 75 ;CHECK: ret 76 define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) { 77 %a = load float, float* %a.ptr 78 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 79 %b = insertelement <16 x float> undef, float %a, i32 0 80 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 81 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i 82 ret <16 x float> %r 83 } 84 85 ;CHECK-LABEL: _ss16xfloat_maskz_load: 86 ;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} {z} 87 ;CHECK: ret 88 define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) { 89 %a = load float, float* %a.ptr 90 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 91 %b = insertelement <16 x float> undef, float %a, i32 0 92 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 93 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer 94 ret <16 x float> %r 95 } 96 97 define <8 x double> @_inreg8xdouble(double %a) { 98 ; CHECK-LABEL: _inreg8xdouble: 99 ; CHECK: ## BB#0: 100 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 101 ; CHECK-NEXT: retq 102 %b = insertelement <8 x double> undef, double %a, i32 0 103 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 104 ret <8 x double> %c 105 } 106 107 ;CHECK-LABEL: _sd8xdouble_mask: 108 ;CHECK: vbroadcastsd %xmm0, %zmm1 {%k1} 109 ;CHECK: ret 110 define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) { 111 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 112 %b = insertelement <8 x double> undef, double %a, i32 0 113 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 114 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i 115 ret <8 x double> %r 116 } 117 118 ;CHECK-LABEL: _sd8xdouble_maskz: 119 ;CHECK: vbroadcastsd %xmm0, %zmm0 {%k1} {z} 120 ;CHECK: ret 121 define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { 122 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 123 %b = insertelement <8 x double> undef, double %a, i32 0 124 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 125 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer 126 ret <8 x double> %r 127 } 128 129 ;CHECK-LABEL: _sd8xdouble_load: 130 ;CHECK: vbroadcastsd (%rdi), %zmm 131 ;CHECK: ret 132 define <8 x double> @_sd8xdouble_load(double* %a.ptr) { 133 %a = load double, double* %a.ptr 134 %b = insertelement <8 x double> undef, double %a, i32 0 135 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 136 ret <8 x double> %c 137 } 138 139 ;CHECK-LABEL: _sd8xdouble_mask_load: 140 ;CHECK: vbroadcastsd (%rdi), %zmm0 {%k1} 141 ;CHECK: ret 142 define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) { 143 %a = load double, double* %a.ptr 144 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 145 %b = insertelement <8 x double> undef, double %a, i32 0 146 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 147 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i 148 ret <8 x double> %r 149 } 150 151 define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) { 152 ; CHECK-LABEL: _sd8xdouble_maskz_load: 153 ; CHECK: vbroadcastsd (%rdi), %zmm0 {%k1} {z} 154 ; CHECK: ret 155 %a = load double, double* %a.ptr 156 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 157 %b = insertelement <8 x double> undef, double %a, i32 0 158 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 159 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer 160 ret <8 x double> %r 161 } 162 163 define <16 x i32> @_xmm16xi32(<16 x i32> %a) { 164 ; CHECK-LABEL: _xmm16xi32: 165 ; CHECK: ## BB#0: 166 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0 167 ; CHECK-NEXT: retq 168 %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer 169 ret <16 x i32> %b 170 } 171 172 define <16 x float> @_xmm16xfloat(<16 x float> %a) { 173 ; CHECK-LABEL: _xmm16xfloat: 174 ; CHECK: ## BB#0: 175 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 176 ; CHECK-NEXT: retq 177 %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer 178 ret <16 x float> %b 179 } 180 181 define <16 x i32> @test_vbroadcast() { 182 ; CHECK-LABEL: test_vbroadcast: 183 ; CHECK: ## BB#0: ## %entry 184 ; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 185 ; CHECK-NEXT: vcmpunordps %zmm0, %zmm0, %k1 186 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 187 ; CHECK-NEXT: knotw %k1, %k1 188 ; CHECK-NEXT: vmovdqu32 %zmm0, %zmm0 {%k1} {z} 189 ; CHECK-NEXT: retq 190 entry: 191 %0 = sext <16 x i1> zeroinitializer to <16 x i32> 192 %1 = fcmp uno <16 x float> undef, zeroinitializer 193 %2 = sext <16 x i1> %1 to <16 x i32> 194 %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2 195 ret <16 x i32> %3 196 } 197 198 ; We implement the set1 intrinsics with vector initializers. Verify that the 199 ; IR generated will produce broadcasts at the end. 200 define <8 x double> @test_set1_pd(double %d) #2 { 201 ; CHECK-LABEL: test_set1_pd: 202 ; CHECK: ## BB#0: ## %entry 203 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 204 ; CHECK-NEXT: retq 205 entry: 206 %vecinit.i = insertelement <8 x double> undef, double %d, i32 0 207 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1 208 %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2 209 %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3 210 %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4 211 %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5 212 %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6 213 %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7 214 ret <8 x double> %vecinit7.i 215 } 216 217 define <8 x i64> @test_set1_epi64(i64 %d) #2 { 218 ; CHECK-LABEL: test_set1_epi64: 219 ; CHECK: ## BB#0: ## %entry 220 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 221 ; CHECK-NEXT: retq 222 entry: 223 %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0 224 %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1 225 %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2 226 %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3 227 %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4 228 %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5 229 %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6 230 %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7 231 ret <8 x i64> %vecinit7.i 232 } 233 234 define <16 x float> @test_set1_ps(float %f) #2 { 235 ; CHECK-LABEL: test_set1_ps: 236 ; CHECK: ## BB#0: ## %entry 237 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 238 ; CHECK-NEXT: retq 239 entry: 240 %vecinit.i = insertelement <16 x float> undef, float %f, i32 0 241 %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1 242 %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2 243 %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3 244 %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4 245 %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5 246 %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6 247 %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7 248 %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8 249 %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9 250 %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10 251 %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11 252 %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12 253 %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13 254 %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14 255 %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15 256 ret <16 x float> %vecinit15.i 257 } 258 259 define <16 x i32> @test_set1_epi32(i32 %f) #2 { 260 ; CHECK-LABEL: test_set1_epi32: 261 ; CHECK: ## BB#0: ## %entry 262 ; CHECK-NEXT: vpbroadcastd %edi, %zmm0 263 ; CHECK-NEXT: retq 264 entry: 265 %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0 266 %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1 267 %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2 268 %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3 269 %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4 270 %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5 271 %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6 272 %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7 273 %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8 274 %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9 275 %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10 276 %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11 277 %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12 278 %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13 279 %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14 280 %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15 281 ret <16 x i32> %vecinit15.i 282 } 283 284 ; We implement the scalar broadcast intrinsics with vector initializers. 285 ; Verify that the IR generated will produce the broadcast at the end. 286 define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) { 287 ; CHECK-LABEL: test_mm512_broadcastsd_pd: 288 ; CHECK: ## BB#0: ## %entry 289 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 290 ; CHECK-NEXT: retq 291 entry: 292 %0 = extractelement <2 x double> %a, i32 0 293 %vecinit.i = insertelement <8 x double> undef, double %0, i32 0 294 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1 295 %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2 296 %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3 297 %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4 298 %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5 299 %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6 300 %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7 301 ret <8 x double> %vecinit7.i 302 } 303