1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s 3 4 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { 5 ; CHECK-LABEL: funcA: 6 ; CHECK: ## BB#0: ## %entry 7 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 8 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 9 ; CHECK-NEXT: retq 10 entry: 11 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 12 ret <32 x i8> %shuffle 13 } 14 15 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { 16 ; CHECK-LABEL: funcB: 17 ; CHECK: ## BB#0: ## %entry 18 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 19 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 20 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 21 ; CHECK-NEXT: retq 22 entry: 23 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 24 ret <16 x i16> %shuffle 25 } 26 27 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { 28 ; CHECK-LABEL: funcC: 29 ; CHECK: ## BB#0: ## %entry 30 ; CHECK-NEXT: vmovq %rdi, %xmm0 31 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 32 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 33 ; CHECK-NEXT: retq 34 entry: 35 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 36 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 37 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 38 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 39 ret <4 x i64> %vecinit6.i 40 } 41 42 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { 43 ; CHECK-LABEL: funcD: 44 ; CHECK: ## BB#0: ## %entry 45 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 46 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 47 ; CHECK-NEXT: retq 48 entry: 49 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 50 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 51 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 52 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 53 ret <4 x double> %vecinit6.i 54 } 55 56 ; Test this turns into a broadcast: 57 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> 58 ; 59 define <8 x float> @funcE() nounwind { 60 ; CHECK-LABEL: funcE: 61 ; CHECK: ## BB#0: ## %for_exit499 62 ; CHECK-NEXT: xorl %eax, %eax 63 ; CHECK-NEXT: ## implicit-def: %YMM0 64 ; CHECK-NEXT: testb %al, %al 65 ; CHECK-NEXT: jne LBB4_2 66 ; CHECK-NEXT: ## BB#1: ## %load.i1247 67 ; CHECK-NEXT: pushq %rbp 68 ; CHECK-NEXT: movq %rsp, %rbp 69 ; CHECK-NEXT: andq $-32, %rsp 70 ; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520 71 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 72 ; CHECK-NEXT: movq %rbp, %rsp 73 ; CHECK-NEXT: popq %rbp 74 ; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249 75 ; CHECK-NEXT: retq 76 allocas: 77 %udx495 = alloca [18 x [18 x float]], align 32 78 br label %for_test505.preheader 79 80 for_test505.preheader: ; preds = %for_test505.preheader, %allocas 81 br i1 undef, label %for_exit499, label %for_test505.preheader 82 83 for_exit499: ; preds = %for_test505.preheader 84 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 85 86 load.i1247: ; preds = %for_exit499 87 %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 88 %ptr.i1237 = bitcast float* %ptr1227 to i32* 89 %val.i1238 = load i32, i32* %ptr.i1237, align 4 90 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 91 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 92 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> 93 br label %__load_and_broadcast_32.exit1249 94 95 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 96 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] 97 ret <8 x float> %load_broadcast12281250 98 } 99 100 define <8 x float> @funcF(i32 %val) nounwind { 101 ; CHECK-LABEL: funcF: 102 ; CHECK: ## BB#0: 103 ; CHECK-NEXT: vmovd %edi, %xmm0 104 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0] 105 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 106 ; CHECK-NEXT: retq 107 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6 108 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7 109 %tmp = bitcast <8 x i32> %ret7 to <8 x float> 110 ret <8 x float> %tmp 111 } 112 113 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { 114 ; CHECK-LABEL: funcG: 115 ; CHECK: ## BB#0: ## %entry 116 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 117 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 118 ; CHECK-NEXT: retq 119 entry: 120 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 121 ret <8 x float> %shuffle 122 } 123 124 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { 125 ; CHECK-LABEL: funcH: 126 ; CHECK: ## BB#0: ## %entry 127 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5] 128 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 129 ; CHECK-NEXT: retq 130 entry: 131 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 132 ret <8 x float> %shuffle 133 } 134 135 define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) { 136 ; CHECK-LABEL: splat_load_2f64_11: 137 ; CHECK: ## BB#0: 138 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 139 ; CHECK-NEXT: retq 140 %x = load <2 x double>, <2 x double>* %ptr 141 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1> 142 ret <2 x double> %x1 143 } 144 145 define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) { 146 ; CHECK-LABEL: splat_load_4f64_2222: 147 ; CHECK: ## BB#0: 148 ; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 149 ; CHECK-NEXT: retq 150 %x = load <4 x double>, <4 x double>* %ptr 151 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 152 ret <4 x double> %x1 153 } 154 155 define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) { 156 ; CHECK-LABEL: splat_load_4f32_0000: 157 ; CHECK: ## BB#0: 158 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 159 ; CHECK-NEXT: retq 160 %x = load <4 x float>, <4 x float>* %ptr 161 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 162 ret <4 x float> %x1 163 } 164 165 define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) { 166 ; CHECK-LABEL: splat_load_8f32_77777777: 167 ; CHECK: ## BB#0: 168 ; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0 169 ; CHECK-NEXT: retq 170 %x = load <8 x float>, <8 x float>* %ptr 171 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 172 ret <8 x float> %x1 173 } 174