1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s 2 3 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { 4 ; CHECK-LABEL: funcA: 5 ; CHECK: ## BB#0: ## %entry 6 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 7 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 8 ; CHECK-NEXT: retq 9 entry: 10 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 11 ret <32 x i8> %shuffle 12 } 13 14 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { 15 ; CHECK-LABEL: funcB: 16 ; CHECK: ## BB#0: ## %entry 17 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11] 18 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 19 ; CHECK-NEXT: retq 20 entry: 21 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 22 ret <16 x i16> %shuffle 23 } 24 25 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { 26 ; CHECK-LABEL: funcC: 27 ; CHECK: ## BB#0: ## %entry 28 ; CHECK-NEXT: vmovq %rdi, %xmm0 29 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 30 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 31 ; CHECK-NEXT: retq 32 entry: 33 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 34 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 35 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 36 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 37 ret <4 x i64> %vecinit6.i 38 } 39 40 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { 41 ; CHECK-LABEL: funcD: 42 ; CHECK: ## BB#0: ## %entry 43 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 44 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 45 ; CHECK-NEXT: retq 46 entry: 47 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 48 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 49 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 50 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 51 ret <4 x double> %vecinit6.i 52 } 53 54 ; Test this turns into a broadcast: 55 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> 56 ; 57 define <8 x float> @funcE() nounwind { 58 ; CHECK-LABEL: funcE: 59 ; CHECK: ## BB#0: ## %for_exit499 60 ; CHECK-NEXT: xorl %eax, %eax 61 ; CHECK-NEXT: ## implicit-def: %YMM0 62 ; CHECK-NEXT: testb %al, %al 63 ; CHECK-NEXT: jne LBB4_2 64 ; CHECK-NEXT: ## BB#1: ## %load.i1247 65 ; CHECK-NEXT: pushq %rbp 66 ; CHECK-NEXT: movq %rsp, %rbp 67 ; CHECK-NEXT: andq $-32, %rsp 68 ; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520 69 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 70 ; CHECK-NEXT: movq %rbp, %rsp 71 ; CHECK-NEXT: popq %rbp 72 ; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249 73 ; CHECK-NEXT: retq 74 allocas: 75 %udx495 = alloca [18 x [18 x float]], align 32 76 br label %for_test505.preheader 77 78 for_test505.preheader: ; preds = %for_test505.preheader, %allocas 79 br i1 undef, label %for_exit499, label %for_test505.preheader 80 81 for_exit499: ; preds = %for_test505.preheader 82 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 83 84 load.i1247: ; preds = %for_exit499 85 %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 86 %ptr.i1237 = bitcast float* %ptr1227 to i32* 87 %val.i1238 = load i32, i32* %ptr.i1237, align 4 88 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 89 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 90 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> 91 br label %__load_and_broadcast_32.exit1249 92 93 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 94 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] 95 ret <8 x float> %load_broadcast12281250 96 } 97 98 define <8 x float> @funcF(i32 %val) nounwind { 99 ; CHECK-LABEL: funcF: 100 ; CHECK: ## BB#0: 101 ; CHECK-NEXT: vmovd %edi, %xmm0 102 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,0] 103 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 104 ; CHECK-NEXT: retq 105 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6 106 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7 107 %tmp = bitcast <8 x i32> %ret7 to <8 x float> 108 ret <8 x float> %tmp 109 } 110 111 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { 112 ; CHECK-LABEL: funcG: 113 ; CHECK: ## BB#0: ## %entry 114 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 115 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 116 ; CHECK-NEXT: retq 117 entry: 118 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 119 ret <8 x float> %shuffle 120 } 121 122 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { 123 ; CHECK-LABEL: funcH: 124 ; CHECK: ## BB#0: ## %entry 125 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 126 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 127 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 128 ; CHECK-NEXT: retq 129 entry: 130 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 131 ret <8 x float> %shuffle 132 } 133 134 define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) { 135 ; CHECK-LABEL: splat_load_2f64_11: 136 ; CHECK: ## BB#0: 137 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 138 ; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 139 ; CHECK-NEXT: retq 140 %x = load <2 x double>, <2 x double>* %ptr 141 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1> 142 ret <2 x double> %x1 143 } 144 145 define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) { 146 ; CHECK-LABEL: splat_load_4f64_2222: 147 ; CHECK: ## BB#0: 148 ; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 149 ; CHECK-NEXT: retq 150 %x = load <4 x double>, <4 x double>* %ptr 151 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 152 ret <4 x double> %x1 153 } 154 155 define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) { 156 ; CHECK-LABEL: splat_load_4f32_0000: 157 ; CHECK: ## BB#0: 158 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 159 ; CHECK-NEXT: retq 160 %x = load <4 x float>, <4 x float>* %ptr 161 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 162 ret <4 x float> %x1 163 } 164 165 define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) { 166 ; CHECK-LABEL: splat_load_8f32_77777777: 167 ; CHECK: ## BB#0: 168 ; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0 169 ; CHECK-NEXT: retq 170 %x = load <8 x float>, <8 x float>* %ptr 171 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 172 ret <8 x float> %x1 173 } 174