1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE-32 --check-prefix=SSE2-32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE-64 --check-prefix=SSE2-64 4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE-32 --check-prefix=SSE41-32 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE-64 --check-prefix=SSE41-64 6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-32 --check-prefix=AVX1-32 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-64 --check-prefix=AVX1-64 8 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX-32 --check-prefix=AVX2-32 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX-64 --check-prefix=AVX2-64 10 11 define <2 x double> @test_buildvector_v2f64(double %a0, double %a1) { 12 ; SSE-32-LABEL: test_buildvector_v2f64: 13 ; SSE-32: # %bb.0: 14 ; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 15 ; SSE-32-NEXT: retl 16 ; 17 ; SSE-64-LABEL: test_buildvector_v2f64: 18 ; SSE-64: # %bb.0: 19 ; SSE-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 20 ; SSE-64-NEXT: retq 21 ; 22 ; AVX-32-LABEL: test_buildvector_v2f64: 23 ; AVX-32: # %bb.0: 24 ; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 25 ; AVX-32-NEXT: retl 26 ; 27 ; AVX-64-LABEL: test_buildvector_v2f64: 28 ; AVX-64: # %bb.0: 29 ; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 30 ; AVX-64-NEXT: retq 31 %ins0 = insertelement <2 x double> undef, double %a0, i32 0 32 %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1 33 ret <2 x double> %ins1 34 } 35 36 define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, float %a3) { 37 ; SSE-32-LABEL: test_buildvector_v4f32: 38 ; SSE-32: # %bb.0: 39 ; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 40 ; SSE-32-NEXT: retl 41 ; 42 ; SSE2-64-LABEL: test_buildvector_v4f32: 43 ; SSE2-64: # %bb.0: 44 ; SSE2-64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 45 ; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 46 ; SSE2-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 47 ; SSE2-64-NEXT: retq 48 ; 49 ; SSE41-64-LABEL: test_buildvector_v4f32: 50 ; SSE41-64: # %bb.0: 51 ; SSE41-64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 52 ; SSE41-64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 53 ; SSE41-64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 54 ; SSE41-64-NEXT: retq 55 ; 56 ; AVX-32-LABEL: test_buildvector_v4f32: 57 ; AVX-32: # %bb.0: 58 ; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 59 ; AVX-32-NEXT: retl 60 ; 61 ; AVX-64-LABEL: test_buildvector_v4f32: 62 ; AVX-64: # %bb.0: 63 ; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 64 ; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 65 ; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 66 ; AVX-64-NEXT: retq 67 %ins0 = insertelement <4 x float> undef, float %a0, i32 0 68 %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1 69 %ins2 = insertelement <4 x float> %ins1, float %a2, i32 2 70 %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3 71 ret <4 x float> %ins3 72 } 73 74 define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) { 75 ; SSE-32-LABEL: test_buildvector_v2i64: 76 ; SSE-32: # %bb.0: 77 ; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 78 ; SSE-32-NEXT: retl 79 ; 80 ; SSE-64-LABEL: test_buildvector_v2i64: 81 ; SSE-64: # %bb.0: 82 ; SSE-64-NEXT: movq %rsi, %xmm1 83 ; SSE-64-NEXT: movq %rdi, %xmm0 84 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 85 ; SSE-64-NEXT: retq 86 ; 87 ; AVX-32-LABEL: test_buildvector_v2i64: 88 ; AVX-32: # %bb.0: 89 ; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 90 ; AVX-32-NEXT: retl 91 ; 92 ; AVX-64-LABEL: test_buildvector_v2i64: 93 ; AVX-64: # %bb.0: 94 ; AVX-64-NEXT: vmovq %rsi, %xmm0 95 ; AVX-64-NEXT: vmovq %rdi, %xmm1 96 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 97 ; AVX-64-NEXT: retq 98 %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 99 %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1 100 ret <2 x i64> %ins1 101 } 102 103 define <4 x i32> @test_buildvector_v4i32(i32 %f0, i32 %f1, i32 %f2, i32 %f3) { 104 ; SSE-32-LABEL: test_buildvector_v4i32: 105 ; SSE-32: # %bb.0: 106 ; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 107 ; SSE-32-NEXT: retl 108 ; 109 ; SSE2-64-LABEL: test_buildvector_v4i32: 110 ; SSE2-64: # %bb.0: 111 ; SSE2-64-NEXT: movd %ecx, %xmm0 112 ; SSE2-64-NEXT: movd %edx, %xmm1 113 ; SSE2-64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 114 ; SSE2-64-NEXT: movd %esi, %xmm2 115 ; SSE2-64-NEXT: movd %edi, %xmm0 116 ; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 117 ; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 118 ; SSE2-64-NEXT: retq 119 ; 120 ; SSE41-64-LABEL: test_buildvector_v4i32: 121 ; SSE41-64: # %bb.0: 122 ; SSE41-64-NEXT: movd %edi, %xmm0 123 ; SSE41-64-NEXT: pinsrd $1, %esi, %xmm0 124 ; SSE41-64-NEXT: pinsrd $2, %edx, %xmm0 125 ; SSE41-64-NEXT: pinsrd $3, %ecx, %xmm0 126 ; SSE41-64-NEXT: retq 127 ; 128 ; AVX-32-LABEL: test_buildvector_v4i32: 129 ; AVX-32: # %bb.0: 130 ; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 131 ; AVX-32-NEXT: retl 132 ; 133 ; AVX-64-LABEL: test_buildvector_v4i32: 134 ; AVX-64: # %bb.0: 135 ; AVX-64-NEXT: vmovd %edi, %xmm0 136 ; AVX-64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 137 ; AVX-64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 138 ; AVX-64-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 139 ; AVX-64-NEXT: retq 140 %ins0 = insertelement <4 x i32> undef, i32 %f0, i32 0 141 %ins1 = insertelement <4 x i32> %ins0, i32 %f1, i32 1 142 %ins2 = insertelement <4 x i32> %ins1, i32 %f2, i32 2 143 %ins3 = insertelement <4 x i32> %ins2, i32 %f3, i32 3 144 ret <4 x i32> %ins3 145 } 146 147 define <8 x i16> @test_buildvector_v8i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) { 148 ; SSE2-32-LABEL: test_buildvector_v8i16: 149 ; SSE2-32: # %bb.0: 150 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 151 ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 152 ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 153 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 154 ; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 155 ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 156 ; SSE2-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 157 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 158 ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 159 ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 160 ; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 161 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 162 ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 163 ; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 164 ; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 165 ; SSE2-32-NEXT: retl 166 ; 167 ; SSE2-64-LABEL: test_buildvector_v8i16: 168 ; SSE2-64: # %bb.0: 169 ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 170 ; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 171 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 172 ; SSE2-64-NEXT: movd %r9d, %xmm0 173 ; SSE2-64-NEXT: movd %r8d, %xmm2 174 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 175 ; SSE2-64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 176 ; SSE2-64-NEXT: movd %ecx, %xmm0 177 ; SSE2-64-NEXT: movd %edx, %xmm1 178 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 179 ; SSE2-64-NEXT: movd %esi, %xmm3 180 ; SSE2-64-NEXT: movd %edi, %xmm0 181 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 182 ; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 183 ; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 184 ; SSE2-64-NEXT: retq 185 ; 186 ; SSE41-32-LABEL: test_buildvector_v8i16: 187 ; SSE41-32: # %bb.0: 188 ; SSE41-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 189 ; SSE41-32-NEXT: pinsrw $1, {{[0-9]+}}(%esp), %xmm0 190 ; SSE41-32-NEXT: pinsrw $2, {{[0-9]+}}(%esp), %xmm0 191 ; SSE41-32-NEXT: pinsrw $3, {{[0-9]+}}(%esp), %xmm0 192 ; SSE41-32-NEXT: pinsrw $4, {{[0-9]+}}(%esp), %xmm0 193 ; SSE41-32-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0 194 ; SSE41-32-NEXT: pinsrw $6, {{[0-9]+}}(%esp), %xmm0 195 ; SSE41-32-NEXT: pinsrw $7, {{[0-9]+}}(%esp), %xmm0 196 ; SSE41-32-NEXT: retl 197 ; 198 ; SSE41-64-LABEL: test_buildvector_v8i16: 199 ; SSE41-64: # %bb.0: 200 ; SSE41-64-NEXT: movd %edi, %xmm0 201 ; SSE41-64-NEXT: pinsrw $1, %esi, %xmm0 202 ; SSE41-64-NEXT: pinsrw $2, %edx, %xmm0 203 ; SSE41-64-NEXT: pinsrw $3, %ecx, %xmm0 204 ; SSE41-64-NEXT: pinsrw $4, %r8d, %xmm0 205 ; SSE41-64-NEXT: pinsrw $5, %r9d, %xmm0 206 ; SSE41-64-NEXT: pinsrw $6, {{[0-9]+}}(%rsp), %xmm0 207 ; SSE41-64-NEXT: pinsrw $7, {{[0-9]+}}(%rsp), %xmm0 208 ; SSE41-64-NEXT: retq 209 ; 210 ; AVX-32-LABEL: test_buildvector_v8i16: 211 ; AVX-32: # %bb.0: 212 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 213 ; AVX-32-NEXT: vpinsrw $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 214 ; AVX-32-NEXT: vpinsrw $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 215 ; AVX-32-NEXT: vpinsrw $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 216 ; AVX-32-NEXT: vpinsrw $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 217 ; AVX-32-NEXT: vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0 218 ; AVX-32-NEXT: vpinsrw $6, {{[0-9]+}}(%esp), %xmm0, %xmm0 219 ; AVX-32-NEXT: vpinsrw $7, {{[0-9]+}}(%esp), %xmm0, %xmm0 220 ; AVX-32-NEXT: retl 221 ; 222 ; AVX-64-LABEL: test_buildvector_v8i16: 223 ; AVX-64: # %bb.0: 224 ; AVX-64-NEXT: vmovd %edi, %xmm0 225 ; AVX-64-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 226 ; AVX-64-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 227 ; AVX-64-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 228 ; AVX-64-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 229 ; AVX-64-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 230 ; AVX-64-NEXT: vpinsrw $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 231 ; AVX-64-NEXT: vpinsrw $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 232 ; AVX-64-NEXT: retq 233 %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 234 %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 235 %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2 236 %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3 237 %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4 238 %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5 239 %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6 240 %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7 241 ret <8 x i16> %ins7 242 } 243 244 define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) { 245 ; SSE2-32-LABEL: test_buildvector_v16i8: 246 ; SSE2-32: # %bb.0: 247 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 248 ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 249 ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 250 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 251 ; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 252 ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 253 ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 254 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 255 ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 256 ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 257 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 258 ; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 259 ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 260 ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 261 ; SSE2-32-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 262 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 263 ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 264 ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 265 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 266 ; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 267 ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 268 ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 269 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 270 ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 271 ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 272 ; SSE2-32-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 273 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 274 ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 275 ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 276 ; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 277 ; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 278 ; SSE2-32-NEXT: retl 279 ; 280 ; SSE2-64-LABEL: test_buildvector_v16i8: 281 ; SSE2-64: # %bb.0: 282 ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 283 ; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 284 ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 285 ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 286 ; SSE2-64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 287 ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 288 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 289 ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 290 ; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 291 ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 292 ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 293 ; SSE2-64-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 294 ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 295 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 296 ; SSE2-64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 297 ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 298 ; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 299 ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 300 ; SSE2-64-NEXT: movd %r9d, %xmm0 301 ; SSE2-64-NEXT: movd %r8d, %xmm2 302 ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 303 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 304 ; SSE2-64-NEXT: movd %ecx, %xmm0 305 ; SSE2-64-NEXT: movd %edx, %xmm1 306 ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 307 ; SSE2-64-NEXT: movd %esi, %xmm4 308 ; SSE2-64-NEXT: movd %edi, %xmm0 309 ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 310 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 311 ; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 312 ; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 313 ; SSE2-64-NEXT: retq 314 ; 315 ; SSE41-32-LABEL: test_buildvector_v16i8: 316 ; SSE41-32: # %bb.0: 317 ; SSE41-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 318 ; SSE41-32-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 319 ; SSE41-32-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 320 ; SSE41-32-NEXT: pinsrb $3, {{[0-9]+}}(%esp), %xmm0 321 ; SSE41-32-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 322 ; SSE41-32-NEXT: pinsrb $5, {{[0-9]+}}(%esp), %xmm0 323 ; SSE41-32-NEXT: pinsrb $6, {{[0-9]+}}(%esp), %xmm0 324 ; SSE41-32-NEXT: pinsrb $7, {{[0-9]+}}(%esp), %xmm0 325 ; SSE41-32-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 326 ; SSE41-32-NEXT: pinsrb $9, {{[0-9]+}}(%esp), %xmm0 327 ; SSE41-32-NEXT: pinsrb $10, {{[0-9]+}}(%esp), %xmm0 328 ; SSE41-32-NEXT: pinsrb $11, {{[0-9]+}}(%esp), %xmm0 329 ; SSE41-32-NEXT: pinsrb $12, {{[0-9]+}}(%esp), %xmm0 330 ; SSE41-32-NEXT: pinsrb $13, {{[0-9]+}}(%esp), %xmm0 331 ; SSE41-32-NEXT: pinsrb $14, {{[0-9]+}}(%esp), %xmm0 332 ; SSE41-32-NEXT: pinsrb $15, {{[0-9]+}}(%esp), %xmm0 333 ; SSE41-32-NEXT: retl 334 ; 335 ; SSE41-64-LABEL: test_buildvector_v16i8: 336 ; SSE41-64: # %bb.0: 337 ; SSE41-64-NEXT: movd %edi, %xmm0 338 ; SSE41-64-NEXT: pinsrb $1, %esi, %xmm0 339 ; SSE41-64-NEXT: pinsrb $2, %edx, %xmm0 340 ; SSE41-64-NEXT: pinsrb $3, %ecx, %xmm0 341 ; SSE41-64-NEXT: pinsrb $4, %r8d, %xmm0 342 ; SSE41-64-NEXT: pinsrb $5, %r9d, %xmm0 343 ; SSE41-64-NEXT: pinsrb $6, {{[0-9]+}}(%rsp), %xmm0 344 ; SSE41-64-NEXT: pinsrb $7, {{[0-9]+}}(%rsp), %xmm0 345 ; SSE41-64-NEXT: pinsrb $8, {{[0-9]+}}(%rsp), %xmm0 346 ; SSE41-64-NEXT: pinsrb $9, {{[0-9]+}}(%rsp), %xmm0 347 ; SSE41-64-NEXT: pinsrb $10, {{[0-9]+}}(%rsp), %xmm0 348 ; SSE41-64-NEXT: pinsrb $11, {{[0-9]+}}(%rsp), %xmm0 349 ; SSE41-64-NEXT: pinsrb $12, {{[0-9]+}}(%rsp), %xmm0 350 ; SSE41-64-NEXT: pinsrb $13, {{[0-9]+}}(%rsp), %xmm0 351 ; SSE41-64-NEXT: pinsrb $14, {{[0-9]+}}(%rsp), %xmm0 352 ; SSE41-64-NEXT: pinsrb $15, {{[0-9]+}}(%rsp), %xmm0 353 ; SSE41-64-NEXT: retq 354 ; 355 ; AVX-32-LABEL: test_buildvector_v16i8: 356 ; AVX-32: # %bb.0: 357 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 358 ; AVX-32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 359 ; AVX-32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 360 ; AVX-32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 361 ; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 362 ; AVX-32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm0, %xmm0 363 ; AVX-32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm0, %xmm0 364 ; AVX-32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm0, %xmm0 365 ; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0 366 ; AVX-32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm0, %xmm0 367 ; AVX-32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm0, %xmm0 368 ; AVX-32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm0, %xmm0 369 ; AVX-32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm0, %xmm0 370 ; AVX-32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm0, %xmm0 371 ; AVX-32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm0, %xmm0 372 ; AVX-32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm0, %xmm0 373 ; AVX-32-NEXT: retl 374 ; 375 ; AVX-64-LABEL: test_buildvector_v16i8: 376 ; AVX-64: # %bb.0: 377 ; AVX-64-NEXT: vmovd %edi, %xmm0 378 ; AVX-64-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 379 ; AVX-64-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 380 ; AVX-64-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 381 ; AVX-64-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 382 ; AVX-64-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 383 ; AVX-64-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 384 ; AVX-64-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 385 ; AVX-64-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 386 ; AVX-64-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 387 ; AVX-64-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 388 ; AVX-64-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 389 ; AVX-64-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 390 ; AVX-64-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 391 ; AVX-64-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 392 ; AVX-64-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 393 ; AVX-64-NEXT: retq 394 %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 395 %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 396 %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 397 %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3 398 %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4 399 %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5 400 %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6 401 %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7 402 %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8 403 %ins9 = insertelement <16 x i8> %ins8, i8 %a9, i32 9 404 %ins10 = insertelement <16 x i8> %ins9, i8 %a10, i32 10 405 %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11 406 %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12 407 %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13 408 %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14 409 %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15 410 ret <16 x i8> %ins15 411 } 412 413 ; PR30780 414 415 define <4 x i32> @test_buildvector_v4i32_splat_sext_i8(i8 %in) { 416 ; SSE-32-LABEL: test_buildvector_v4i32_splat_sext_i8: 417 ; SSE-32: # %bb.0: 418 ; SSE-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax 419 ; SSE-32-NEXT: movd %eax, %xmm0 420 ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 421 ; SSE-32-NEXT: retl 422 ; 423 ; SSE-64-LABEL: test_buildvector_v4i32_splat_sext_i8: 424 ; SSE-64: # %bb.0: 425 ; SSE-64-NEXT: movsbl %dil, %eax 426 ; SSE-64-NEXT: movd %eax, %xmm0 427 ; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 428 ; SSE-64-NEXT: retq 429 ; 430 ; AVX1-32-LABEL: test_buildvector_v4i32_splat_sext_i8: 431 ; AVX1-32: # %bb.0: 432 ; AVX1-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax 433 ; AVX1-32-NEXT: vmovd %eax, %xmm0 434 ; AVX1-32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 435 ; AVX1-32-NEXT: retl 436 ; 437 ; AVX1-64-LABEL: test_buildvector_v4i32_splat_sext_i8: 438 ; AVX1-64: # %bb.0: 439 ; AVX1-64-NEXT: movsbl %dil, %eax 440 ; AVX1-64-NEXT: vmovd %eax, %xmm0 441 ; AVX1-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 442 ; AVX1-64-NEXT: retq 443 ; 444 ; AVX2-32-LABEL: test_buildvector_v4i32_splat_sext_i8: 445 ; AVX2-32: # %bb.0: 446 ; AVX2-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax 447 ; AVX2-32-NEXT: vmovd %eax, %xmm0 448 ; AVX2-32-NEXT: vpbroadcastd %xmm0, %xmm0 449 ; AVX2-32-NEXT: retl 450 ; 451 ; AVX2-64-LABEL: test_buildvector_v4i32_splat_sext_i8: 452 ; AVX2-64: # %bb.0: 453 ; AVX2-64-NEXT: movsbl %dil, %eax 454 ; AVX2-64-NEXT: vmovd %eax, %xmm0 455 ; AVX2-64-NEXT: vpbroadcastd %xmm0, %xmm0 456 ; AVX2-64-NEXT: retq 457 %ext = sext i8 %in to i32 458 %insert = insertelement <4 x i32> undef, i32 %ext, i32 0 459 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer 460 ret <4 x i32> %splat 461 } 462 463 define <4 x i32> @test_buildvector_v4i32_splat_zext_i8(i8 %in) { 464 ; SSE-32-LABEL: test_buildvector_v4i32_splat_zext_i8: 465 ; SSE-32: # %bb.0: 466 ; SSE-32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 467 ; SSE-32-NEXT: movd %eax, %xmm0 468 ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 469 ; SSE-32-NEXT: retl 470 ; 471 ; SSE-64-LABEL: test_buildvector_v4i32_splat_zext_i8: 472 ; SSE-64: # %bb.0: 473 ; SSE-64-NEXT: movzbl %dil, %eax 474 ; SSE-64-NEXT: movd %eax, %xmm0 475 ; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 476 ; SSE-64-NEXT: retq 477 ; 478 ; AVX1-32-LABEL: test_buildvector_v4i32_splat_zext_i8: 479 ; AVX1-32: # %bb.0: 480 ; AVX1-32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 481 ; AVX1-32-NEXT: vmovd %eax, %xmm0 482 ; AVX1-32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 483 ; AVX1-32-NEXT: retl 484 ; 485 ; AVX1-64-LABEL: test_buildvector_v4i32_splat_zext_i8: 486 ; AVX1-64: # %bb.0: 487 ; AVX1-64-NEXT: movzbl %dil, %eax 488 ; AVX1-64-NEXT: vmovd %eax, %xmm0 489 ; AVX1-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 490 ; AVX1-64-NEXT: retq 491 ; 492 ; AVX2-32-LABEL: test_buildvector_v4i32_splat_zext_i8: 493 ; AVX2-32: # %bb.0: 494 ; AVX2-32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 495 ; AVX2-32-NEXT: vmovd %eax, %xmm0 496 ; AVX2-32-NEXT: vpbroadcastd %xmm0, %xmm0 497 ; AVX2-32-NEXT: retl 498 ; 499 ; AVX2-64-LABEL: test_buildvector_v4i32_splat_zext_i8: 500 ; AVX2-64: # %bb.0: 501 ; AVX2-64-NEXT: movzbl %dil, %eax 502 ; AVX2-64-NEXT: vmovd %eax, %xmm0 503 ; AVX2-64-NEXT: vpbroadcastd %xmm0, %xmm0 504 ; AVX2-64-NEXT: retq 505 %ext = zext i8 %in to i32 506 %insert = insertelement <4 x i32> undef, i32 %ext, i32 0 507 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer 508 ret <4 x i32> %splat 509 } 510