1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 8 9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 10 target triple = "x86_64-unknown-unknown" 11 12 define <2 x double> @insert_v2f64_z1(<2 x double> %a) { 13 ; SSE2-LABEL: insert_v2f64_z1: 14 ; SSE2: # BB#0: 15 ; SSE2-NEXT: xorpd %xmm1, %xmm1 16 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 17 ; SSE2-NEXT: retq 18 ; 19 ; SSE3-LABEL: insert_v2f64_z1: 20 ; SSE3: # BB#0: 21 ; SSE3-NEXT: xorpd %xmm1, %xmm1 22 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 23 ; SSE3-NEXT: retq 24 ; 25 ; SSSE3-LABEL: insert_v2f64_z1: 26 ; SSSE3: # BB#0: 27 ; SSSE3-NEXT: xorpd %xmm1, %xmm1 28 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 29 ; SSSE3-NEXT: retq 30 ; 31 ; SSE41-LABEL: insert_v2f64_z1: 32 ; SSE41: # BB#0: 33 ; SSE41-NEXT: xorpd %xmm1, %xmm1 34 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 35 ; SSE41-NEXT: retq 36 ; 37 ; AVX-LABEL: insert_v2f64_z1: 38 ; AVX: # BB#0: 39 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 40 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 41 ; AVX-NEXT: retq 42 %1 = insertelement <2 x double> %a, double 0.0, i32 0 43 ret <2 x double> %1 44 } 45 46 define <4 x double> @insert_v4f64_0zz3(<4 x double> %a) { 47 ; SSE2-LABEL: insert_v4f64_0zz3: 48 ; SSE2: # BB#0: 49 ; SSE2-NEXT: xorpd %xmm2, %xmm2 50 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 51 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 52 ; SSE2-NEXT: retq 53 ; 54 ; SSE3-LABEL: insert_v4f64_0zz3: 55 ; SSE3: # BB#0: 56 ; SSE3-NEXT: xorpd %xmm2, %xmm2 57 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 58 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 59 ; SSE3-NEXT: retq 60 ; 61 ; SSSE3-LABEL: insert_v4f64_0zz3: 62 ; SSSE3: # BB#0: 63 ; SSSE3-NEXT: xorpd %xmm2, %xmm2 64 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 65 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 66 ; SSSE3-NEXT: retq 67 ; 68 ; SSE41-LABEL: insert_v4f64_0zz3: 69 ; SSE41: # BB#0: 70 ; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 71 ; SSE41-NEXT: xorpd %xmm2, %xmm2 72 ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 73 ; SSE41-NEXT: retq 74 ; 75 ; AVX-LABEL: insert_v4f64_0zz3: 76 ; AVX: # BB#0: 77 ; AVX-NEXT: vxorpd %ymm1, %ymm1, %ymm1 78 ; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] 79 ; AVX-NEXT: retq 80 %1 = insertelement <4 x double> %a, double 0.0, i32 1 81 %2 = insertelement <4 x double> %1, double 0.0, i32 2 82 ret <4 x double> %2 83 } 84 85 define <2 x i64> @insert_v2i64_z1(<2 x i64> %a) { 86 ; SSE2-LABEL: insert_v2i64_z1: 87 ; SSE2: # BB#0: 88 ; SSE2-NEXT: xorpd %xmm1, %xmm1 89 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 90 ; SSE2-NEXT: retq 91 ; 92 ; SSE3-LABEL: insert_v2i64_z1: 93 ; SSE3: # BB#0: 94 ; SSE3-NEXT: xorpd %xmm1, %xmm1 95 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 96 ; SSE3-NEXT: retq 97 ; 98 ; SSSE3-LABEL: insert_v2i64_z1: 99 ; SSSE3: # BB#0: 100 ; SSSE3-NEXT: xorpd %xmm1, %xmm1 101 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 102 ; SSSE3-NEXT: retq 103 ; 104 ; SSE41-LABEL: insert_v2i64_z1: 105 ; SSE41: # BB#0: 106 ; SSE41-NEXT: pxor %xmm1, %xmm1 107 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 108 ; SSE41-NEXT: retq 109 ; 110 ; AVX1-LABEL: insert_v2i64_z1: 111 ; AVX1: # BB#0: 112 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 113 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 114 ; AVX1-NEXT: retq 115 ; 116 ; AVX2-LABEL: insert_v2i64_z1: 117 ; AVX2: # BB#0: 118 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 119 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 120 ; AVX2-NEXT: retq 121 %1 = insertelement <2 x i64> %a, i64 0, i32 0 122 ret <2 x i64> %1 123 } 124 125 define <4 x i64> @insert_v4i64_01z3(<4 x i64> %a) { 126 ; SSE2-LABEL: insert_v4i64_01z3: 127 ; SSE2: # BB#0: 128 ; SSE2-NEXT: xorpd %xmm2, %xmm2 129 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 130 ; SSE2-NEXT: retq 131 ; 132 ; SSE3-LABEL: insert_v4i64_01z3: 133 ; SSE3: # BB#0: 134 ; SSE3-NEXT: xorpd %xmm2, %xmm2 135 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 136 ; SSE3-NEXT: retq 137 ; 138 ; SSSE3-LABEL: insert_v4i64_01z3: 139 ; SSSE3: # BB#0: 140 ; SSSE3-NEXT: xorpd %xmm2, %xmm2 141 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 142 ; SSSE3-NEXT: retq 143 ; 144 ; SSE41-LABEL: insert_v4i64_01z3: 145 ; SSE41: # BB#0: 146 ; SSE41-NEXT: pxor %xmm2, %xmm2 147 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 148 ; SSE41-NEXT: retq 149 ; 150 ; AVX1-LABEL: insert_v4i64_01z3: 151 ; AVX1: # BB#0: 152 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 153 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 154 ; AVX1-NEXT: retq 155 ; 156 ; AVX2-LABEL: insert_v4i64_01z3: 157 ; AVX2: # BB#0: 158 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 159 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 160 ; AVX2-NEXT: retq 161 %1 = insertelement <4 x i64> %a, i64 0, i32 2 162 ret <4 x i64> %1 163 } 164 165 define <4 x float> @insert_v4f32_01z3(<4 x float> %a) { 166 ; SSE2-LABEL: insert_v4f32_01z3: 167 ; SSE2: # BB#0: 168 ; SSE2-NEXT: xorps %xmm1, %xmm1 169 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 170 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 171 ; SSE2-NEXT: retq 172 ; 173 ; SSE3-LABEL: insert_v4f32_01z3: 174 ; SSE3: # BB#0: 175 ; SSE3-NEXT: xorps %xmm1, %xmm1 176 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 177 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 178 ; SSE3-NEXT: retq 179 ; 180 ; SSSE3-LABEL: insert_v4f32_01z3: 181 ; SSSE3: # BB#0: 182 ; SSSE3-NEXT: xorps %xmm1, %xmm1 183 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 184 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 185 ; SSSE3-NEXT: retq 186 ; 187 ; SSE41-LABEL: insert_v4f32_01z3: 188 ; SSE41: # BB#0: 189 ; SSE41-NEXT: xorps %xmm1, %xmm1 190 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 191 ; SSE41-NEXT: retq 192 ; 193 ; AVX-LABEL: insert_v4f32_01z3: 194 ; AVX: # BB#0: 195 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 196 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 197 ; AVX-NEXT: retq 198 %1 = insertelement <4 x float> %a, float 0.0, i32 2 199 ret <4 x float> %1 200 } 201 202 define <8 x float> @insert_v8f32_z12345z7(<8 x float> %a) { 203 ; SSE2-LABEL: insert_v8f32_z12345z7: 204 ; SSE2: # BB#0: 205 ; SSE2-NEXT: xorps %xmm2, %xmm2 206 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 207 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 208 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 209 ; SSE2-NEXT: retq 210 ; 211 ; SSE3-LABEL: insert_v8f32_z12345z7: 212 ; SSE3: # BB#0: 213 ; SSE3-NEXT: xorps %xmm2, %xmm2 214 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 215 ; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 216 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 217 ; SSE3-NEXT: retq 218 ; 219 ; SSSE3-LABEL: insert_v8f32_z12345z7: 220 ; SSSE3: # BB#0: 221 ; SSSE3-NEXT: xorps %xmm2, %xmm2 222 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 223 ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 224 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 225 ; SSSE3-NEXT: retq 226 ; 227 ; SSE41-LABEL: insert_v8f32_z12345z7: 228 ; SSE41: # BB#0: 229 ; SSE41-NEXT: xorps %xmm2, %xmm2 230 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 231 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3] 232 ; SSE41-NEXT: retq 233 ; 234 ; AVX-LABEL: insert_v8f32_z12345z7: 235 ; AVX: # BB#0: 236 ; AVX-NEXT: vxorps %ymm1, %ymm1, %ymm1 237 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7] 238 ; AVX-NEXT: retq 239 %1 = insertelement <8 x float> %a, float 0.0, i32 0 240 %2 = insertelement <8 x float> %1, float 0.0, i32 6 241 ret <8 x float> %2 242 } 243 244 define <4 x i32> @insert_v4i32_01z3(<4 x i32> %a) { 245 ; SSE2-LABEL: insert_v4i32_01z3: 246 ; SSE2: # BB#0: 247 ; SSE2-NEXT: xorl %eax, %eax 248 ; SSE2-NEXT: movd %eax, %xmm1 249 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 250 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 251 ; SSE2-NEXT: retq 252 ; 253 ; SSE3-LABEL: insert_v4i32_01z3: 254 ; SSE3: # BB#0: 255 ; SSE3-NEXT: xorl %eax, %eax 256 ; SSE3-NEXT: movd %eax, %xmm1 257 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 258 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 259 ; SSE3-NEXT: retq 260 ; 261 ; SSSE3-LABEL: insert_v4i32_01z3: 262 ; SSSE3: # BB#0: 263 ; SSSE3-NEXT: xorl %eax, %eax 264 ; SSSE3-NEXT: movd %eax, %xmm1 265 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 266 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 267 ; SSSE3-NEXT: retq 268 ; 269 ; SSE41-LABEL: insert_v4i32_01z3: 270 ; SSE41: # BB#0: 271 ; SSE41-NEXT: pxor %xmm1, %xmm1 272 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 273 ; SSE41-NEXT: retq 274 ; 275 ; AVX1-LABEL: insert_v4i32_01z3: 276 ; AVX1: # BB#0: 277 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 278 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 279 ; AVX1-NEXT: retq 280 ; 281 ; AVX2-LABEL: insert_v4i32_01z3: 282 ; AVX2: # BB#0: 283 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 284 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 285 ; AVX2-NEXT: retq 286 %1 = insertelement <4 x i32> %a, i32 0, i32 2 287 ret <4 x i32> %1 288 } 289 290 define <8 x i32> @insert_v8i32_z12345z7(<8 x i32> %a) { 291 ; SSE2-LABEL: insert_v8i32_z12345z7: 292 ; SSE2: # BB#0: 293 ; SSE2-NEXT: xorps %xmm2, %xmm2 294 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 295 ; SSE2-NEXT: xorl %eax, %eax 296 ; SSE2-NEXT: movd %eax, %xmm2 297 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 298 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 299 ; SSE2-NEXT: retq 300 ; 301 ; SSE3-LABEL: insert_v8i32_z12345z7: 302 ; SSE3: # BB#0: 303 ; SSE3-NEXT: xorps %xmm2, %xmm2 304 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 305 ; SSE3-NEXT: xorl %eax, %eax 306 ; SSE3-NEXT: movd %eax, %xmm2 307 ; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 308 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 309 ; SSE3-NEXT: retq 310 ; 311 ; SSSE3-LABEL: insert_v8i32_z12345z7: 312 ; SSSE3: # BB#0: 313 ; SSSE3-NEXT: xorps %xmm2, %xmm2 314 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 315 ; SSSE3-NEXT: xorl %eax, %eax 316 ; SSSE3-NEXT: movd %eax, %xmm2 317 ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 318 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 319 ; SSSE3-NEXT: retq 320 ; 321 ; SSE41-LABEL: insert_v8i32_z12345z7: 322 ; SSE41: # BB#0: 323 ; SSE41-NEXT: pxor %xmm2, %xmm2 324 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7] 325 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 326 ; SSE41-NEXT: retq 327 ; 328 ; AVX1-LABEL: insert_v8i32_z12345z7: 329 ; AVX1: # BB#0: 330 ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 331 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7] 332 ; AVX1-NEXT: retq 333 ; 334 ; AVX2-LABEL: insert_v8i32_z12345z7: 335 ; AVX2: # BB#0: 336 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 337 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7] 338 ; AVX2-NEXT: retq 339 %1 = insertelement <8 x i32> %a, i32 0, i32 0 340 %2 = insertelement <8 x i32> %1, i32 0, i32 6 341 ret <8 x i32> %2 342 } 343 344 define <8 x i16> @insert_v8i16_z12345z7(<8 x i16> %a) { 345 ; SSE2-LABEL: insert_v8i16_z12345z7: 346 ; SSE2: # BB#0: 347 ; SSE2-NEXT: xorl %eax, %eax 348 ; SSE2-NEXT: pinsrw $0, %eax, %xmm0 349 ; SSE2-NEXT: pinsrw $6, %eax, %xmm0 350 ; SSE2-NEXT: retq 351 ; 352 ; SSE3-LABEL: insert_v8i16_z12345z7: 353 ; SSE3: # BB#0: 354 ; SSE3-NEXT: xorl %eax, %eax 355 ; SSE3-NEXT: pinsrw $0, %eax, %xmm0 356 ; SSE3-NEXT: pinsrw $6, %eax, %xmm0 357 ; SSE3-NEXT: retq 358 ; 359 ; SSSE3-LABEL: insert_v8i16_z12345z7: 360 ; SSSE3: # BB#0: 361 ; SSSE3-NEXT: xorl %eax, %eax 362 ; SSSE3-NEXT: pinsrw $0, %eax, %xmm0 363 ; SSSE3-NEXT: pinsrw $6, %eax, %xmm0 364 ; SSSE3-NEXT: retq 365 ; 366 ; SSE41-LABEL: insert_v8i16_z12345z7: 367 ; SSE41: # BB#0: 368 ; SSE41-NEXT: pxor %xmm1, %xmm1 369 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] 370 ; SSE41-NEXT: retq 371 ; 372 ; AVX-LABEL: insert_v8i16_z12345z7: 373 ; AVX: # BB#0: 374 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 375 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] 376 ; AVX-NEXT: retq 377 %1 = insertelement <8 x i16> %a, i16 0, i32 0 378 %2 = insertelement <8 x i16> %1, i16 0, i32 6 379 ret <8 x i16> %2 380 } 381 382 define <16 x i16> @insert_v16i16_z12345z789ABZDEz(<16 x i16> %a) { 383 ; SSE2-LABEL: insert_v16i16_z12345z789ABZDEz: 384 ; SSE2: # BB#0: 385 ; SSE2-NEXT: xorl %eax, %eax 386 ; SSE2-NEXT: pinsrw $0, %eax, %xmm0 387 ; SSE2-NEXT: pinsrw $6, %eax, %xmm0 388 ; SSE2-NEXT: pinsrw $7, %eax, %xmm1 389 ; SSE2-NEXT: retq 390 ; 391 ; SSE3-LABEL: insert_v16i16_z12345z789ABZDEz: 392 ; SSE3: # BB#0: 393 ; SSE3-NEXT: xorl %eax, %eax 394 ; SSE3-NEXT: pinsrw $0, %eax, %xmm0 395 ; SSE3-NEXT: pinsrw $6, %eax, %xmm0 396 ; SSE3-NEXT: pinsrw $7, %eax, %xmm1 397 ; SSE3-NEXT: retq 398 ; 399 ; SSSE3-LABEL: insert_v16i16_z12345z789ABZDEz: 400 ; SSSE3: # BB#0: 401 ; SSSE3-NEXT: xorl %eax, %eax 402 ; SSSE3-NEXT: pinsrw $0, %eax, %xmm0 403 ; SSSE3-NEXT: pinsrw $6, %eax, %xmm0 404 ; SSSE3-NEXT: pinsrw $7, %eax, %xmm1 405 ; SSSE3-NEXT: retq 406 ; 407 ; SSE41-LABEL: insert_v16i16_z12345z789ABZDEz: 408 ; SSE41: # BB#0: 409 ; SSE41-NEXT: pxor %xmm2, %xmm2 410 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7] 411 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7] 412 ; SSE41-NEXT: retq 413 ; 414 ; AVX1-LABEL: insert_v16i16_z12345z789ABZDEz: 415 ; AVX1: # BB#0: 416 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 417 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3,4,5,6,7] 418 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 419 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm1[6],xmm0[7] 420 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 421 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 422 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7] 423 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 424 ; AVX1-NEXT: retq 425 ; 426 ; AVX2-LABEL: insert_v16i16_z12345z789ABZDEz: 427 ; AVX2: # BB#0: 428 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 429 ; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3,4,5,6,7] 430 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 431 ; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm1[6],xmm0[7] 432 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 433 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 434 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7] 435 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 436 ; AVX2-NEXT: retq 437 %1 = insertelement <16 x i16> %a, i16 0, i32 0 438 %2 = insertelement <16 x i16> %1, i16 0, i32 6 439 %3 = insertelement <16 x i16> %2, i16 0, i32 15 440 ret <16 x i16> %3 441 } 442 443 define <16 x i8> @insert_v16i8_z123456789ABZDEz(<16 x i8> %a) { 444 ; SSE2-LABEL: insert_v16i8_z123456789ABZDEz: 445 ; SSE2: # BB#0: 446 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 447 ; SSE2-NEXT: pand %xmm1, %xmm0 448 ; SSE2-NEXT: xorl %eax, %eax 449 ; SSE2-NEXT: movd %eax, %xmm2 450 ; SSE2-NEXT: pandn %xmm2, %xmm1 451 ; SSE2-NEXT: por %xmm1, %xmm0 452 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] 453 ; SSE2-NEXT: pand %xmm1, %xmm0 454 ; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] 455 ; SSE2-NEXT: pandn %xmm2, %xmm1 456 ; SSE2-NEXT: por %xmm1, %xmm0 457 ; SSE2-NEXT: retq 458 ; 459 ; SSE3-LABEL: insert_v16i8_z123456789ABZDEz: 460 ; SSE3: # BB#0: 461 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 462 ; SSE3-NEXT: pand %xmm1, %xmm0 463 ; SSE3-NEXT: xorl %eax, %eax 464 ; SSE3-NEXT: movd %eax, %xmm2 465 ; SSE3-NEXT: pandn %xmm2, %xmm1 466 ; SSE3-NEXT: por %xmm1, %xmm0 467 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] 468 ; SSE3-NEXT: pand %xmm1, %xmm0 469 ; SSE3-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] 470 ; SSE3-NEXT: pandn %xmm2, %xmm1 471 ; SSE3-NEXT: por %xmm1, %xmm0 472 ; SSE3-NEXT: retq 473 ; 474 ; SSSE3-LABEL: insert_v16i8_z123456789ABZDEz: 475 ; SSSE3: # BB#0: 476 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 477 ; SSSE3-NEXT: xorl %eax, %eax 478 ; SSSE3-NEXT: movd %eax, %xmm1 479 ; SSSE3-NEXT: movdqa %xmm1, %xmm2 480 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 481 ; SSSE3-NEXT: por %xmm2, %xmm0 482 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero 483 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] 484 ; SSSE3-NEXT: por %xmm1, %xmm0 485 ; SSSE3-NEXT: retq 486 ; 487 ; SSE41-LABEL: insert_v16i8_z123456789ABZDEz: 488 ; SSE41: # BB#0: 489 ; SSE41-NEXT: xorl %eax, %eax 490 ; SSE41-NEXT: pinsrb $0, %eax, %xmm0 491 ; SSE41-NEXT: pinsrb $15, %eax, %xmm0 492 ; SSE41-NEXT: retq 493 ; 494 ; AVX-LABEL: insert_v16i8_z123456789ABZDEz: 495 ; AVX: # BB#0: 496 ; AVX-NEXT: xorl %eax, %eax 497 ; AVX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 498 ; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 499 ; AVX-NEXT: retq 500 %1 = insertelement <16 x i8> %a, i8 0, i32 0 501 %2 = insertelement <16 x i8> %1, i8 0, i32 15 502 ret <16 x i8> %2 503 } 504 505 define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) { 506 ; SSE2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 507 ; SSE2: # BB#0: 508 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 509 ; SSE2-NEXT: pand %xmm2, %xmm0 510 ; SSE2-NEXT: xorl %eax, %eax 511 ; SSE2-NEXT: movd %eax, %xmm3 512 ; SSE2-NEXT: pandn %xmm3, %xmm2 513 ; SSE2-NEXT: por %xmm2, %xmm0 514 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] 515 ; SSE2-NEXT: pand %xmm2, %xmm0 516 ; SSE2-NEXT: movdqa %xmm3, %xmm4 517 ; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] 518 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] 519 ; SSE2-NEXT: pand %xmm5, %xmm1 520 ; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] 521 ; SSE2-NEXT: pandn %xmm3, %xmm5 522 ; SSE2-NEXT: por %xmm5, %xmm1 523 ; SSE2-NEXT: pand %xmm2, %xmm1 524 ; SSE2-NEXT: pandn %xmm4, %xmm2 525 ; SSE2-NEXT: por %xmm2, %xmm0 526 ; SSE2-NEXT: por %xmm2, %xmm1 527 ; SSE2-NEXT: retq 528 ; 529 ; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 530 ; SSE3: # BB#0: 531 ; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 532 ; SSE3-NEXT: pand %xmm2, %xmm0 533 ; SSE3-NEXT: xorl %eax, %eax 534 ; SSE3-NEXT: movd %eax, %xmm3 535 ; SSE3-NEXT: pandn %xmm3, %xmm2 536 ; SSE3-NEXT: por %xmm2, %xmm0 537 ; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] 538 ; SSE3-NEXT: pand %xmm2, %xmm0 539 ; SSE3-NEXT: movdqa %xmm3, %xmm4 540 ; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] 541 ; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] 542 ; SSE3-NEXT: pand %xmm5, %xmm1 543 ; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] 544 ; SSE3-NEXT: pandn %xmm3, %xmm5 545 ; SSE3-NEXT: por %xmm5, %xmm1 546 ; SSE3-NEXT: pand %xmm2, %xmm1 547 ; SSE3-NEXT: pandn %xmm4, %xmm2 548 ; SSE3-NEXT: por %xmm2, %xmm0 549 ; SSE3-NEXT: por %xmm2, %xmm1 550 ; SSE3-NEXT: retq 551 ; 552 ; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 553 ; SSSE3: # BB#0: 554 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 555 ; SSSE3-NEXT: xorl %eax, %eax 556 ; SSSE3-NEXT: movd %eax, %xmm2 557 ; SSSE3-NEXT: movdqa %xmm2, %xmm3 558 ; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 559 ; SSSE3-NEXT: por %xmm3, %xmm0 560 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,128] 561 ; SSSE3-NEXT: pshufb %xmm3, %xmm0 562 ; SSSE3-NEXT: movdqa %xmm2, %xmm4 563 ; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] 564 ; SSSE3-NEXT: por %xmm4, %xmm0 565 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[15] 566 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0],zero 567 ; SSSE3-NEXT: por %xmm2, %xmm1 568 ; SSSE3-NEXT: pshufb %xmm3, %xmm1 569 ; SSSE3-NEXT: por %xmm4, %xmm1 570 ; SSSE3-NEXT: retq 571 ; 572 ; SSE41-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 573 ; SSE41: # BB#0: 574 ; SSE41-NEXT: xorl %eax, %eax 575 ; SSE41-NEXT: pinsrb $0, %eax, %xmm0 576 ; SSE41-NEXT: pinsrb $15, %eax, %xmm0 577 ; SSE41-NEXT: pinsrb $14, %eax, %xmm1 578 ; SSE41-NEXT: pinsrb $15, %eax, %xmm1 579 ; SSE41-NEXT: retq 580 ; 581 ; AVX1-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 582 ; AVX1: # BB#0: 583 ; AVX1-NEXT: xorl %eax, %eax 584 ; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 585 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 586 ; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1 587 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 588 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 589 ; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 590 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 591 ; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 592 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 593 ; AVX1-NEXT: retq 594 ; 595 ; AVX2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 596 ; AVX2: # BB#0: 597 ; AVX2-NEXT: xorl %eax, %eax 598 ; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 599 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 600 ; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1 601 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 602 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 603 ; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 604 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 605 ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 606 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 607 ; AVX2-NEXT: retq 608 %1 = insertelement <32 x i8> %a, i8 0, i32 0 609 %2 = insertelement <32 x i8> %1, i8 0, i32 15 610 %3 = insertelement <32 x i8> %2, i8 0, i32 30 611 %4 = insertelement <32 x i8> %3, i8 0, i32 31 612 ret <32 x i8> %4 613 } 614