1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5 define <4 x double> @test_broadcast_2f64_4f64(<2 x double> *%p) nounwind { 6 ; X32-LABEL: test_broadcast_2f64_4f64: 7 ; X32: # %bb.0: 8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9 ; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 10 ; X32-NEXT: retl 11 ; 12 ; X64-LABEL: test_broadcast_2f64_4f64: 13 ; X64: # %bb.0: 14 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 15 ; X64-NEXT: retq 16 %1 = load <2 x double>, <2 x double> *%p 17 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 18 ret <4 x double> %2 19 } 20 21 define <4 x i64> @test_broadcast_2i64_4i64(<2 x i64> *%p) nounwind { 22 ; X32-LABEL: test_broadcast_2i64_4i64: 23 ; X32: # %bb.0: 24 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 25 ; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 26 ; X32-NEXT: retl 27 ; 28 ; X64-LABEL: test_broadcast_2i64_4i64: 29 ; X64: # %bb.0: 30 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 31 ; X64-NEXT: retq 32 %1 = load <2 x i64>, <2 x i64> *%p 33 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 34 ret <4 x i64> %2 35 } 36 37 define <8 x float> @test_broadcast_4f32_8f32(<4 x float> *%p) nounwind { 38 ; X32-LABEL: test_broadcast_4f32_8f32: 39 ; X32: # %bb.0: 40 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 41 ; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 42 ; X32-NEXT: retl 43 ; 44 ; X64-LABEL: test_broadcast_4f32_8f32: 45 ; X64: # %bb.0: 46 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 47 ; X64-NEXT: retq 48 %1 = load <4 x float>, <4 x float> *%p 49 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 50 ret <8 x float> %2 51 } 52 53 define <8 x i32> @test_broadcast_4i32_8i32(<4 x i32> *%p) nounwind { 54 ; X32-LABEL: test_broadcast_4i32_8i32: 55 ; X32: # %bb.0: 56 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 57 ; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 58 ; X32-NEXT: retl 59 ; 60 ; X64-LABEL: test_broadcast_4i32_8i32: 61 ; X64: # %bb.0: 62 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 63 ; X64-NEXT: retq 64 %1 = load <4 x i32>, <4 x i32> *%p 65 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 66 ret <8 x i32> %2 67 } 68 69 define <16 x i16> @test_broadcast_8i16_16i16(<8 x i16> *%p) nounwind { 70 ; X32-LABEL: test_broadcast_8i16_16i16: 71 ; X32: # %bb.0: 72 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 73 ; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 74 ; X32-NEXT: retl 75 ; 76 ; X64-LABEL: test_broadcast_8i16_16i16: 77 ; X64: # %bb.0: 78 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 79 ; X64-NEXT: retq 80 %1 = load <8 x i16>, <8 x i16> *%p 81 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 82 ret <16 x i16> %2 83 } 84 85 define <32 x i8> @test_broadcast_16i8_32i8(<16 x i8> *%p) nounwind { 86 ; X32-LABEL: test_broadcast_16i8_32i8: 87 ; X32: # %bb.0: 88 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 89 ; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 90 ; X32-NEXT: retl 91 ; 92 ; X64-LABEL: test_broadcast_16i8_32i8: 93 ; X64: # %bb.0: 94 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 95 ; X64-NEXT: retq 96 %1 = load <16 x i8>, <16 x i8> *%p 97 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 98 ret <32 x i8> %2 99 } 100 101 define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) { 102 ; X32-LABEL: test_broadcast_2f64_4f64_reuse: 103 ; X32: # %bb.0: 104 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 105 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 106 ; X32-NEXT: vmovaps (%ecx), %xmm1 107 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 108 ; X32-NEXT: vmovaps %xmm1, (%eax) 109 ; X32-NEXT: retl 110 ; 111 ; X64-LABEL: test_broadcast_2f64_4f64_reuse: 112 ; X64: # %bb.0: 113 ; X64-NEXT: vmovaps (%rdi), %xmm1 114 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 115 ; X64-NEXT: vmovaps %xmm1, (%rsi) 116 ; X64-NEXT: retq 117 %1 = load <2 x double>, <2 x double>* %p0 118 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 119 store <2 x double> %1, <2 x double>* %p1 120 ret <4 x double> %2 121 } 122 123 define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) { 124 ; X32-LABEL: test_broadcast_2i64_4i64_reuse: 125 ; X32: # %bb.0: 126 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 127 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 128 ; X32-NEXT: vmovaps (%ecx), %xmm1 129 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 130 ; X32-NEXT: vmovaps %xmm1, (%eax) 131 ; X32-NEXT: retl 132 ; 133 ; X64-LABEL: test_broadcast_2i64_4i64_reuse: 134 ; X64: # %bb.0: 135 ; X64-NEXT: vmovaps (%rdi), %xmm1 136 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 137 ; X64-NEXT: vmovaps %xmm1, (%rsi) 138 ; X64-NEXT: retq 139 %1 = load <2 x i64>, <2 x i64>* %p0 140 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 141 store <2 x i64> %1, <2 x i64>* %p1 142 ret <4 x i64> %2 143 } 144 145 define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>* %p1) { 146 ; X32-LABEL: test_broadcast_4f32_8f32_reuse: 147 ; X32: # %bb.0: 148 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 149 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 150 ; X32-NEXT: vmovaps (%ecx), %xmm1 151 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 152 ; X32-NEXT: vmovaps %xmm1, (%eax) 153 ; X32-NEXT: retl 154 ; 155 ; X64-LABEL: test_broadcast_4f32_8f32_reuse: 156 ; X64: # %bb.0: 157 ; X64-NEXT: vmovaps (%rdi), %xmm1 158 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 159 ; X64-NEXT: vmovaps %xmm1, (%rsi) 160 ; X64-NEXT: retq 161 %1 = load <4 x float>, <4 x float>* %p0 162 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 163 store <4 x float> %1, <4 x float>* %p1 164 ret <8 x float> %2 165 } 166 167 define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) { 168 ; X32-LABEL: test_broadcast_4i32_8i32_reuse: 169 ; X32: # %bb.0: 170 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 171 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 172 ; X32-NEXT: vmovaps (%ecx), %xmm1 173 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 174 ; X32-NEXT: vmovaps %xmm1, (%eax) 175 ; X32-NEXT: retl 176 ; 177 ; X64-LABEL: test_broadcast_4i32_8i32_reuse: 178 ; X64: # %bb.0: 179 ; X64-NEXT: vmovaps (%rdi), %xmm1 180 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 181 ; X64-NEXT: vmovaps %xmm1, (%rsi) 182 ; X64-NEXT: retq 183 %1 = load <4 x i32>, <4 x i32>* %p0 184 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 185 store <4 x i32> %1, <4 x i32>* %p1 186 ret <8 x i32> %2 187 } 188 189 define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p1) nounwind { 190 ; X32-LABEL: test_broadcast_8i16_16i16_reuse: 191 ; X32: # %bb.0: 192 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 193 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 194 ; X32-NEXT: vmovaps (%ecx), %xmm1 195 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 196 ; X32-NEXT: vmovaps %xmm1, (%eax) 197 ; X32-NEXT: retl 198 ; 199 ; X64-LABEL: test_broadcast_8i16_16i16_reuse: 200 ; X64: # %bb.0: 201 ; X64-NEXT: vmovaps (%rdi), %xmm1 202 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 203 ; X64-NEXT: vmovaps %xmm1, (%rsi) 204 ; X64-NEXT: retq 205 %1 = load <8 x i16>, <8 x i16> *%p0 206 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 207 store <8 x i16> %1, <8 x i16>* %p1 208 ret <16 x i16> %2 209 } 210 211 define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) nounwind { 212 ; X32-LABEL: test_broadcast_16i8_32i8_reuse: 213 ; X32: # %bb.0: 214 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 215 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 216 ; X32-NEXT: vmovaps (%ecx), %xmm1 217 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 218 ; X32-NEXT: vmovaps %xmm1, (%eax) 219 ; X32-NEXT: retl 220 ; 221 ; X64-LABEL: test_broadcast_16i8_32i8_reuse: 222 ; X64: # %bb.0: 223 ; X64-NEXT: vmovaps (%rdi), %xmm1 224 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0 225 ; X64-NEXT: vmovaps %xmm1, (%rsi) 226 ; X64-NEXT: retq 227 %1 = load <16 x i8>, <16 x i8> *%p0 228 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 229 store <16 x i8> %1, <16 x i8>* %p1 230 ret <32 x i8> %2 231 } 232 233 define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) { 234 ; X32-LABEL: PR29088: 235 ; X32: # %bb.0: 236 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 237 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 238 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 239 ; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 240 ; X32-NEXT: vmovaps %ymm1, (%eax) 241 ; X32-NEXT: retl 242 ; 243 ; X64-LABEL: PR29088: 244 ; X64: # %bb.0: 245 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 246 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 247 ; X64-NEXT: vmovaps %ymm1, (%rsi) 248 ; X64-NEXT: retq 249 %ld = load <4 x i32>, <4 x i32>* %p0 250 store <8 x float> zeroinitializer, <8 x float>* %p1 251 %shuf = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 252 ret <8 x i32> %shuf 253 } 254