1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=SSE1 3 4 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) { 5 ; SSE1-LABEL: shuffle_v4f32_0001: 6 ; SSE1: # %bb.0: 7 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1] 8 ; SSE1-NEXT: retq 9 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 10 ret <4 x float> %shuffle 11 } 12 13 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) { 14 ; SSE1-LABEL: shuffle_v4f32_0020: 15 ; SSE1: # %bb.0: 16 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0] 17 ; SSE1-NEXT: retq 18 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 19 ret <4 x float> %shuffle 20 } 21 22 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) { 23 ; SSE1-LABEL: shuffle_v4f32_0300: 24 ; SSE1: # %bb.0: 25 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0] 26 ; SSE1-NEXT: retq 27 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 28 ret <4 x float> %shuffle 29 } 30 31 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) { 32 ; SSE1-LABEL: shuffle_v4f32_1000: 33 ; SSE1: # %bb.0: 34 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0] 35 ; SSE1-NEXT: retq 36 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 37 ret <4 x float> %shuffle 38 } 39 40 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) { 41 ; SSE1-LABEL: shuffle_v4f32_2200: 42 ; SSE1: # %bb.0: 43 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0] 44 ; SSE1-NEXT: retq 45 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 46 ret <4 x float> %shuffle 47 } 48 49 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) { 50 ; SSE1-LABEL: shuffle_v4f32_3330: 51 ; SSE1: # %bb.0: 52 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0] 53 ; SSE1-NEXT: retq 54 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 55 ret <4 x float> %shuffle 56 } 57 58 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) { 59 ; SSE1-LABEL: shuffle_v4f32_3210: 60 ; SSE1: # %bb.0: 61 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] 62 ; SSE1-NEXT: retq 63 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 64 ret <4 x float> %shuffle 65 } 66 67 define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) { 68 ; SSE1-LABEL: shuffle_v4f32_0011: 69 ; SSE1: # %bb.0: 70 ; SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] 71 ; SSE1-NEXT: retq 72 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 73 ret <4 x float> %shuffle 74 } 75 76 define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) { 77 ; SSE1-LABEL: shuffle_v4f32_2233: 78 ; SSE1: # %bb.0: 79 ; SSE1-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3] 80 ; SSE1-NEXT: retq 81 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 82 ret <4 x float> %shuffle 83 } 84 85 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) { 86 ; SSE1-LABEL: shuffle_v4f32_0022: 87 ; SSE1: # %bb.0: 88 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2] 89 ; SSE1-NEXT: retq 90 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 91 ret <4 x float> %shuffle 92 } 93 94 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) { 95 ; SSE1-LABEL: shuffle_v4f32_1133: 96 ; SSE1: # %bb.0: 97 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 98 ; SSE1-NEXT: retq 99 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 100 ret <4 x float> %shuffle 101 } 102 103 define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) { 104 ; SSE1-LABEL: shuffle_v4f32_0145: 105 ; SSE1: # %bb.0: 106 ; SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 107 ; SSE1-NEXT: retq 108 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 109 ret <4 x float> %shuffle 110 } 111 112 define <4 x float> @shuffle_v4f32_0101(<4 x float> %a, <4 x float> %b) { 113 ; SSE1-LABEL: shuffle_v4f32_0101: 114 ; SSE1: # %bb.0: 115 ; SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 116 ; SSE1-NEXT: retq 117 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 118 ret <4 x float> %shuffle 119 } 120 121 define <4 x float> @shuffle_v4f32_2323(<4 x float> %a, <4 x float> %b) { 122 ; SSE1-LABEL: shuffle_v4f32_2323: 123 ; SSE1: # %bb.0: 124 ; SSE1-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 125 ; SSE1-NEXT: retq 126 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 127 ret <4 x float> %shuffle 128 } 129 130 define <4 x float> @shuffle_v4f32_6723(<4 x float> %a, <4 x float> %b) { 131 ; SSE1-LABEL: shuffle_v4f32_6723: 132 ; SSE1: # %bb.0: 133 ; SSE1-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] 134 ; SSE1-NEXT: retq 135 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 136 ret <4 x float> %shuffle 137 } 138 139 define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) { 140 ; SSE1-LABEL: shuffle_v4f32_4zzz: 141 ; SSE1: # %bb.0: 142 ; SSE1-NEXT: xorps %xmm1, %xmm1 143 ; SSE1-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 144 ; SSE1-NEXT: movaps %xmm1, %xmm0 145 ; SSE1-NEXT: retq 146 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 147 ret <4 x float> %shuffle 148 } 149 150 define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) { 151 ; SSE1-LABEL: shuffle_v4f32_z4zz: 152 ; SSE1: # %bb.0: 153 ; SSE1-NEXT: xorps %xmm1, %xmm1 154 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 155 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 156 ; SSE1-NEXT: retq 157 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0> 158 ret <4 x float> %shuffle 159 } 160 161 define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) { 162 ; SSE1-LABEL: shuffle_v4f32_zz4z: 163 ; SSE1: # %bb.0: 164 ; SSE1-NEXT: xorps %xmm1, %xmm1 165 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 166 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 167 ; SSE1-NEXT: movaps %xmm1, %xmm0 168 ; SSE1-NEXT: retq 169 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0> 170 ret <4 x float> %shuffle 171 } 172 173 define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) { 174 ; SSE1-LABEL: shuffle_v4f32_zuu4: 175 ; SSE1: # %bb.0: 176 ; SSE1-NEXT: xorps %xmm1, %xmm1 177 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 178 ; SSE1-NEXT: movaps %xmm1, %xmm0 179 ; SSE1-NEXT: retq 180 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4> 181 ret <4 x float> %shuffle 182 } 183 184 define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) { 185 ; SSE1-LABEL: shuffle_v4f32_zzz7: 186 ; SSE1: # %bb.0: 187 ; SSE1-NEXT: xorps %xmm1, %xmm1 188 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 189 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 190 ; SSE1-NEXT: movaps %xmm1, %xmm0 191 ; SSE1-NEXT: retq 192 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 193 ret <4 x float> %shuffle 194 } 195 196 define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) { 197 ; SSE1-LABEL: shuffle_v4f32_z6zz: 198 ; SSE1: # %bb.0: 199 ; SSE1-NEXT: xorps %xmm1, %xmm1 200 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 201 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 202 ; SSE1-NEXT: retq 203 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3> 204 ret <4 x float> %shuffle 205 } 206 207 define <4 x float> @insert_reg_and_zero_v4f32(float %a) { 208 ; SSE1-LABEL: insert_reg_and_zero_v4f32: 209 ; SSE1: # %bb.0: 210 ; SSE1-NEXT: xorps %xmm1, %xmm1 211 ; SSE1-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 212 ; SSE1-NEXT: movaps %xmm1, %xmm0 213 ; SSE1-NEXT: retq 214 %v = insertelement <4 x float> undef, float %a, i32 0 215 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 216 ret <4 x float> %shuffle 217 } 218 219 define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) { 220 ; SSE1-LABEL: insert_mem_and_zero_v4f32: 221 ; SSE1: # %bb.0: 222 ; SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 223 ; SSE1-NEXT: retq 224 %a = load float, float* %ptr 225 %v = insertelement <4 x float> undef, float %a, i32 0 226 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 227 ret <4 x float> %shuffle 228 } 229 230 define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) { 231 ; SSE1-LABEL: insert_mem_lo_v4f32: 232 ; SSE1: # %bb.0: 233 ; SSE1-NEXT: movq (%rdi), %rax 234 ; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 235 ; SSE1-NEXT: shrq $32, %rax 236 ; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 237 ; SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 238 ; SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 239 ; SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 240 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 241 ; SSE1-NEXT: movaps %xmm1, %xmm0 242 ; SSE1-NEXT: retq 243 %a = load <2 x float>, <2 x float>* %ptr 244 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 245 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 246 ret <4 x float> %shuffle 247 } 248 249 define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) { 250 ; SSE1-LABEL: insert_mem_hi_v4f32: 251 ; SSE1: # %bb.0: 252 ; SSE1-NEXT: movq (%rdi), %rax 253 ; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 254 ; SSE1-NEXT: shrq $32, %rax 255 ; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 256 ; SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 257 ; SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 258 ; SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 259 ; SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 260 ; SSE1-NEXT: retq 261 %a = load <2 x float>, <2 x float>* %ptr 262 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 263 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 264 ret <4 x float> %shuffle 265 } 266 267 define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) { 268 ; SSE1-LABEL: shuffle_mem_v4f32_3210: 269 ; SSE1: # %bb.0: 270 ; SSE1-NEXT: movaps (%rdi), %xmm0 271 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] 272 ; SSE1-NEXT: retq 273 %a = load <4 x float>, <4 x float>* %ptr 274 %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 275 ret <4 x float> %shuffle 276 } 277 278 define <4 x float> @shuffle_mem_v4f32_0145(<4 x float> %a, <4 x float>* %pb) { 279 ; SSE1-LABEL: shuffle_mem_v4f32_0145: 280 ; SSE1: # %bb.0: 281 ; SSE1-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 282 ; SSE1-NEXT: retq 283 %b = load <4 x float>, <4 x float>* %pb, align 1 284 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 285 ret <4 x float> %shuffle 286 } 287 288 define <4 x float> @shuffle_mem_v4f32_6723(<4 x float> %a, <4 x float>* %pb) { 289 ; SSE1-LABEL: shuffle_mem_v4f32_6723: 290 ; SSE1: # %bb.0: 291 ; SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 292 ; SSE1-NEXT: retq 293 %b = load <4 x float>, <4 x float>* %pb, align 16 294 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 295 ret <4 x float> %shuffle 296 } 297 298 define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) { 299 ; SSE1-LABEL: shuffle_mem_v4f32_4523: 300 ; SSE1: # %bb.0: 301 ; SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 302 ; SSE1-NEXT: retq 303 %b = load <4 x float>, <4 x float>* %pb, align 1 304 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 305 ret <4 x float> %shuffle 306 } 307