1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse-builtins.c 10 11 define <4 x float> @test_mm_add_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 12 ; SSE-LABEL: test_mm_add_ps: 13 ; SSE: # %bb.0: 14 ; SSE-NEXT: addps %xmm1, %xmm0 # encoding: [0x0f,0x58,0xc1] 15 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16 ; 17 ; AVX1-LABEL: test_mm_add_ps: 18 ; AVX1: # %bb.0: 19 ; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc1] 20 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 21 ; 22 ; AVX512-LABEL: test_mm_add_ps: 23 ; AVX512: # %bb.0: 24 ; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 25 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 26 %res = fadd <4 x float> %a0, %a1 27 ret <4 x float> %res 28 } 29 30 define <4 x float> @test_mm_add_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 31 ; SSE-LABEL: test_mm_add_ss: 32 ; SSE: # %bb.0: 33 ; SSE-NEXT: addss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x58,0xc1] 34 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 35 ; 36 ; AVX1-LABEL: test_mm_add_ss: 37 ; AVX1: # %bb.0: 38 ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x58,0xc1] 39 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 40 ; 41 ; AVX512-LABEL: test_mm_add_ss: 42 ; AVX512: # %bb.0: 43 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1] 44 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 45 %ext0 = extractelement <4 x float> %a0, i32 0 46 %ext1 = extractelement <4 x float> %a1, i32 0 47 %fadd = fadd float %ext0, %ext1 48 %res = insertelement <4 x float> %a0, float %fadd, i32 0 49 ret <4 x float> %res 50 } 51 52 define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 53 ; SSE-LABEL: test_mm_and_ps: 54 ; SSE: # %bb.0: 55 ; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 56 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 57 ; 58 ; AVX1-LABEL: test_mm_and_ps: 59 ; AVX1: # %bb.0: 60 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 61 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 62 ; 63 ; AVX512-LABEL: test_mm_and_ps: 64 ; AVX512: # %bb.0: 65 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 66 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 67 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 68 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 69 %res = and <4 x i32> %arg0, %arg1 70 %bc = bitcast <4 x i32> %res to <4 x float> 71 ret <4 x float> %bc 72 } 73 74 define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 75 ; SSE-LABEL: test_mm_andnot_ps: 76 ; SSE: # %bb.0: 77 ; SSE-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] 78 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 79 ; 80 ; AVX1-LABEL: test_mm_andnot_ps: 81 ; AVX1: # %bb.0: 82 ; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] 83 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 84 ; 85 ; AVX512-LABEL: test_mm_andnot_ps: 86 ; AVX512: # %bb.0: 87 ; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1] 88 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 89 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 90 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 91 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> 92 %res = and <4 x i32> %not, %arg1 93 %bc = bitcast <4 x i32> %res to <4 x float> 94 ret <4 x float> %bc 95 } 96 97 define <4 x float> @test_mm_cmpeq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 98 ; SSE-LABEL: test_mm_cmpeq_ps: 99 ; SSE: # %bb.0: 100 ; SSE-NEXT: cmpeqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x00] 101 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 102 ; 103 ; AVX1-LABEL: test_mm_cmpeq_ps: 104 ; AVX1: # %bb.0: 105 ; AVX1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x00] 106 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 107 ; 108 ; AVX512-LABEL: test_mm_cmpeq_ps: 109 ; AVX512: # %bb.0: 110 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00] 111 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 112 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 113 %cmp = fcmp oeq <4 x float> %a0, %a1 114 %sext = sext <4 x i1> %cmp to <4 x i32> 115 %res = bitcast <4 x i32> %sext to <4 x float> 116 ret <4 x float> %res 117 } 118 119 define <4 x float> @test_mm_cmpeq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 120 ; SSE-LABEL: test_mm_cmpeq_ss: 121 ; SSE: # %bb.0: 122 ; SSE-NEXT: cmpeqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x00] 123 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 124 ; 125 ; AVX-LABEL: test_mm_cmpeq_ss: 126 ; AVX: # %bb.0: 127 ; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x00] 128 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 129 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 130 ret <4 x float> %res 131 } 132 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 133 134 define <4 x float> @test_mm_cmpge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 135 ; SSE-LABEL: test_mm_cmpge_ps: 136 ; SSE: # %bb.0: 137 ; SSE-NEXT: cmpleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x02] 138 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 139 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 140 ; 141 ; AVX1-LABEL: test_mm_cmpge_ps: 142 ; AVX1: # %bb.0: 143 ; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x02] 144 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 145 ; 146 ; AVX512-LABEL: test_mm_cmpge_ps: 147 ; AVX512: # %bb.0: 148 ; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x02] 149 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 150 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 151 %cmp = fcmp ole <4 x float> %a1, %a0 152 %sext = sext <4 x i1> %cmp to <4 x i32> 153 %res = bitcast <4 x i32> %sext to <4 x float> 154 ret <4 x float> %res 155 } 156 157 define <4 x float> @test_mm_cmpge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 158 ; SSE-LABEL: test_mm_cmpge_ss: 159 ; SSE: # %bb.0: 160 ; SSE-NEXT: cmpless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x02] 161 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 162 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 163 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 164 ; 165 ; AVX-LABEL: test_mm_cmpge_ss: 166 ; AVX: # %bb.0: 167 ; AVX-NEXT: vcmpless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x02] 168 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 169 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 170 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 171 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 2) 172 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 173 ret <4 x float> %res 174 } 175 176 define <4 x float> @test_mm_cmpgt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 177 ; SSE-LABEL: test_mm_cmpgt_ps: 178 ; SSE: # %bb.0: 179 ; SSE-NEXT: cmpltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x01] 180 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 181 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 182 ; 183 ; AVX1-LABEL: test_mm_cmpgt_ps: 184 ; AVX1: # %bb.0: 185 ; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x01] 186 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 187 ; 188 ; AVX512-LABEL: test_mm_cmpgt_ps: 189 ; AVX512: # %bb.0: 190 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x01] 191 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 192 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 193 %cmp = fcmp olt <4 x float> %a1, %a0 194 %sext = sext <4 x i1> %cmp to <4 x i32> 195 %res = bitcast <4 x i32> %sext to <4 x float> 196 ret <4 x float> %res 197 } 198 199 define <4 x float> @test_mm_cmpgt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 200 ; SSE-LABEL: test_mm_cmpgt_ss: 201 ; SSE: # %bb.0: 202 ; SSE-NEXT: cmpltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x01] 203 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 204 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 205 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 206 ; 207 ; AVX-LABEL: test_mm_cmpgt_ss: 208 ; AVX: # %bb.0: 209 ; AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x01] 210 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 211 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 212 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 213 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 1) 214 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 215 ret <4 x float> %res 216 } 217 218 define <4 x float> @test_mm_cmple_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 219 ; SSE-LABEL: test_mm_cmple_ps: 220 ; SSE: # %bb.0: 221 ; SSE-NEXT: cmpleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x02] 222 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 223 ; 224 ; AVX1-LABEL: test_mm_cmple_ps: 225 ; AVX1: # %bb.0: 226 ; AVX1-NEXT: vcmpleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x02] 227 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 228 ; 229 ; AVX512-LABEL: test_mm_cmple_ps: 230 ; AVX512: # %bb.0: 231 ; AVX512-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] 232 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 233 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 234 %cmp = fcmp ole <4 x float> %a0, %a1 235 %sext = sext <4 x i1> %cmp to <4 x i32> 236 %res = bitcast <4 x i32> %sext to <4 x float> 237 ret <4 x float> %res 238 } 239 240 define <4 x float> @test_mm_cmple_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 241 ; SSE-LABEL: test_mm_cmple_ss: 242 ; SSE: # %bb.0: 243 ; SSE-NEXT: cmpless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x02] 244 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 245 ; 246 ; AVX-LABEL: test_mm_cmple_ss: 247 ; AVX: # %bb.0: 248 ; AVX-NEXT: vcmpless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x02] 249 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 250 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 2) 251 ret <4 x float> %res 252 } 253 254 define <4 x float> @test_mm_cmplt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 255 ; SSE-LABEL: test_mm_cmplt_ps: 256 ; SSE: # %bb.0: 257 ; SSE-NEXT: cmpltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x01] 258 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 259 ; 260 ; AVX1-LABEL: test_mm_cmplt_ps: 261 ; AVX1: # %bb.0: 262 ; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x01] 263 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 264 ; 265 ; AVX512-LABEL: test_mm_cmplt_ps: 266 ; AVX512: # %bb.0: 267 ; AVX512-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01] 268 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 269 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 270 %cmp = fcmp olt <4 x float> %a0, %a1 271 %sext = sext <4 x i1> %cmp to <4 x i32> 272 %res = bitcast <4 x i32> %sext to <4 x float> 273 ret <4 x float> %res 274 } 275 276 define <4 x float> @test_mm_cmplt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 277 ; SSE-LABEL: test_mm_cmplt_ss: 278 ; SSE: # %bb.0: 279 ; SSE-NEXT: cmpltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x01] 280 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 281 ; 282 ; AVX-LABEL: test_mm_cmplt_ss: 283 ; AVX: # %bb.0: 284 ; AVX-NEXT: vcmpltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x01] 285 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 286 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 1) 287 ret <4 x float> %res 288 } 289 290 define <4 x float> @test_mm_cmpneq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 291 ; SSE-LABEL: test_mm_cmpneq_ps: 292 ; SSE: # %bb.0: 293 ; SSE-NEXT: cmpneqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x04] 294 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 295 ; 296 ; AVX1-LABEL: test_mm_cmpneq_ps: 297 ; AVX1: # %bb.0: 298 ; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x04] 299 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 300 ; 301 ; AVX512-LABEL: test_mm_cmpneq_ps: 302 ; AVX512: # %bb.0: 303 ; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x04] 304 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 305 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 306 %cmp = fcmp une <4 x float> %a0, %a1 307 %sext = sext <4 x i1> %cmp to <4 x i32> 308 %res = bitcast <4 x i32> %sext to <4 x float> 309 ret <4 x float> %res 310 } 311 312 define <4 x float> @test_mm_cmpneq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 313 ; SSE-LABEL: test_mm_cmpneq_ss: 314 ; SSE: # %bb.0: 315 ; SSE-NEXT: cmpneqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x04] 316 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 317 ; 318 ; AVX-LABEL: test_mm_cmpneq_ss: 319 ; AVX: # %bb.0: 320 ; AVX-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x04] 321 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 322 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 4) 323 ret <4 x float> %res 324 } 325 326 define <4 x float> @test_mm_cmpnge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 327 ; SSE-LABEL: test_mm_cmpnge_ps: 328 ; SSE: # %bb.0: 329 ; SSE-NEXT: cmpnleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x06] 330 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 331 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 332 ; 333 ; AVX1-LABEL: test_mm_cmpnge_ps: 334 ; AVX1: # %bb.0: 335 ; AVX1-NEXT: vcmpnleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x06] 336 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 337 ; 338 ; AVX512-LABEL: test_mm_cmpnge_ps: 339 ; AVX512: # %bb.0: 340 ; AVX512-NEXT: vcmpnleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x06] 341 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 342 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 343 %cmp = fcmp ugt <4 x float> %a1, %a0 344 %sext = sext <4 x i1> %cmp to <4 x i32> 345 %res = bitcast <4 x i32> %sext to <4 x float> 346 ret <4 x float> %res 347 } 348 349 define <4 x float> @test_mm_cmpnge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 350 ; SSE-LABEL: test_mm_cmpnge_ss: 351 ; SSE: # %bb.0: 352 ; SSE-NEXT: cmpnless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x06] 353 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 354 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 355 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 356 ; 357 ; AVX-LABEL: test_mm_cmpnge_ss: 358 ; AVX: # %bb.0: 359 ; AVX-NEXT: vcmpnless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x06] 360 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 361 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 362 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 363 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 6) 364 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 365 ret <4 x float> %res 366 } 367 368 define <4 x float> @test_mm_cmpngt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 369 ; SSE-LABEL: test_mm_cmpngt_ps: 370 ; SSE: # %bb.0: 371 ; SSE-NEXT: cmpnltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x05] 372 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 373 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 374 ; 375 ; AVX1-LABEL: test_mm_cmpngt_ps: 376 ; AVX1: # %bb.0: 377 ; AVX1-NEXT: vcmpnltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x05] 378 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 379 ; 380 ; AVX512-LABEL: test_mm_cmpngt_ps: 381 ; AVX512: # %bb.0: 382 ; AVX512-NEXT: vcmpnltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x05] 383 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 384 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 385 %cmp = fcmp uge <4 x float> %a1, %a0 386 %sext = sext <4 x i1> %cmp to <4 x i32> 387 %res = bitcast <4 x i32> %sext to <4 x float> 388 ret <4 x float> %res 389 } 390 391 define <4 x float> @test_mm_cmpngt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 392 ; SSE-LABEL: test_mm_cmpngt_ss: 393 ; SSE: # %bb.0: 394 ; SSE-NEXT: cmpnltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x05] 395 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 396 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 397 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 398 ; 399 ; AVX-LABEL: test_mm_cmpngt_ss: 400 ; AVX: # %bb.0: 401 ; AVX-NEXT: vcmpnltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x05] 402 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 403 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 404 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 405 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 5) 406 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 407 ret <4 x float> %res 408 } 409 410 define <4 x float> @test_mm_cmpnle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 411 ; SSE-LABEL: test_mm_cmpnle_ps: 412 ; SSE: # %bb.0: 413 ; SSE-NEXT: cmpnleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x06] 414 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 415 ; 416 ; AVX1-LABEL: test_mm_cmpnle_ps: 417 ; AVX1: # %bb.0: 418 ; AVX1-NEXT: vcmpnleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x06] 419 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 420 ; 421 ; AVX512-LABEL: test_mm_cmpnle_ps: 422 ; AVX512: # %bb.0: 423 ; AVX512-NEXT: vcmpnleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x06] 424 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 425 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 426 %cmp = fcmp ugt <4 x float> %a0, %a1 427 %sext = sext <4 x i1> %cmp to <4 x i32> 428 %res = bitcast <4 x i32> %sext to <4 x float> 429 ret <4 x float> %res 430 } 431 432 define <4 x float> @test_mm_cmpnle_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 433 ; SSE-LABEL: test_mm_cmpnle_ss: 434 ; SSE: # %bb.0: 435 ; SSE-NEXT: cmpnless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x06] 436 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 437 ; 438 ; AVX-LABEL: test_mm_cmpnle_ss: 439 ; AVX: # %bb.0: 440 ; AVX-NEXT: vcmpnless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x06] 441 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 442 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 6) 443 ret <4 x float> %res 444 } 445 446 define <4 x float> @test_mm_cmpnlt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 447 ; SSE-LABEL: test_mm_cmpnlt_ps: 448 ; SSE: # %bb.0: 449 ; SSE-NEXT: cmpnltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x05] 450 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 451 ; 452 ; AVX1-LABEL: test_mm_cmpnlt_ps: 453 ; AVX1: # %bb.0: 454 ; AVX1-NEXT: vcmpnltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x05] 455 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 456 ; 457 ; AVX512-LABEL: test_mm_cmpnlt_ps: 458 ; AVX512: # %bb.0: 459 ; AVX512-NEXT: vcmpnltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x05] 460 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 461 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 462 %cmp = fcmp uge <4 x float> %a0, %a1 463 %sext = sext <4 x i1> %cmp to <4 x i32> 464 %res = bitcast <4 x i32> %sext to <4 x float> 465 ret <4 x float> %res 466 } 467 468 define <4 x float> @test_mm_cmpnlt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 469 ; SSE-LABEL: test_mm_cmpnlt_ss: 470 ; SSE: # %bb.0: 471 ; SSE-NEXT: cmpnltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x05] 472 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 473 ; 474 ; AVX-LABEL: test_mm_cmpnlt_ss: 475 ; AVX: # %bb.0: 476 ; AVX-NEXT: vcmpnltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x05] 477 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 478 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 5) 479 ret <4 x float> %res 480 } 481 482 define <4 x float> @test_mm_cmpord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 483 ; SSE-LABEL: test_mm_cmpord_ps: 484 ; SSE: # %bb.0: 485 ; SSE-NEXT: cmpordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x07] 486 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 487 ; 488 ; AVX1-LABEL: test_mm_cmpord_ps: 489 ; AVX1: # %bb.0: 490 ; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x07] 491 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 492 ; 493 ; AVX512-LABEL: test_mm_cmpord_ps: 494 ; AVX512: # %bb.0: 495 ; AVX512-NEXT: vcmpordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x07] 496 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 497 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 498 %cmp = fcmp ord <4 x float> %a0, %a1 499 %sext = sext <4 x i1> %cmp to <4 x i32> 500 %res = bitcast <4 x i32> %sext to <4 x float> 501 ret <4 x float> %res 502 } 503 504 define <4 x float> @test_mm_cmpord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 505 ; SSE-LABEL: test_mm_cmpord_ss: 506 ; SSE: # %bb.0: 507 ; SSE-NEXT: cmpordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x07] 508 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 509 ; 510 ; AVX-LABEL: test_mm_cmpord_ss: 511 ; AVX: # %bb.0: 512 ; AVX-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x07] 513 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 514 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) 515 ret <4 x float> %res 516 } 517 518 define <4 x float> @test_mm_cmpunord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 519 ; SSE-LABEL: test_mm_cmpunord_ps: 520 ; SSE: # %bb.0: 521 ; SSE-NEXT: cmpunordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x03] 522 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 523 ; 524 ; AVX1-LABEL: test_mm_cmpunord_ps: 525 ; AVX1: # %bb.0: 526 ; AVX1-NEXT: vcmpunordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x03] 527 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 528 ; 529 ; AVX512-LABEL: test_mm_cmpunord_ps: 530 ; AVX512: # %bb.0: 531 ; AVX512-NEXT: vcmpunordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x03] 532 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 533 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 534 %cmp = fcmp uno <4 x float> %a0, %a1 535 %sext = sext <4 x i1> %cmp to <4 x i32> 536 %res = bitcast <4 x i32> %sext to <4 x float> 537 ret <4 x float> %res 538 } 539 540 define <4 x float> @test_mm_cmpunord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 541 ; SSE-LABEL: test_mm_cmpunord_ss: 542 ; SSE: # %bb.0: 543 ; SSE-NEXT: cmpunordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x03] 544 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 545 ; 546 ; AVX-LABEL: test_mm_cmpunord_ss: 547 ; AVX: # %bb.0: 548 ; AVX-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x03] 549 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 550 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 3) 551 ret <4 x float> %res 552 } 553 554 define i32 @test_mm_comieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 555 ; SSE-LABEL: test_mm_comieq_ss: 556 ; SSE: # %bb.0: 557 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 558 ; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 559 ; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 560 ; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 561 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 562 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 563 ; 564 ; AVX1-LABEL: test_mm_comieq_ss: 565 ; AVX1: # %bb.0: 566 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 567 ; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 568 ; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 569 ; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 570 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 571 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 572 ; 573 ; AVX512-LABEL: test_mm_comieq_ss: 574 ; AVX512: # %bb.0: 575 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 576 ; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 577 ; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 578 ; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 579 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 580 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 581 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 582 ret i32 %res 583 } 584 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 585 586 define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 587 ; SSE-LABEL: test_mm_comige_ss: 588 ; SSE: # %bb.0: 589 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 590 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 591 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 592 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 593 ; 594 ; AVX1-LABEL: test_mm_comige_ss: 595 ; AVX1: # %bb.0: 596 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 597 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 598 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 599 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 600 ; 601 ; AVX512-LABEL: test_mm_comige_ss: 602 ; AVX512: # %bb.0: 603 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 604 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 605 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 606 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 607 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) 608 ret i32 %res 609 } 610 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 611 612 define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 613 ; SSE-LABEL: test_mm_comigt_ss: 614 ; SSE: # %bb.0: 615 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 616 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 617 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 618 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 619 ; 620 ; AVX1-LABEL: test_mm_comigt_ss: 621 ; AVX1: # %bb.0: 622 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 623 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 624 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 625 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 626 ; 627 ; AVX512-LABEL: test_mm_comigt_ss: 628 ; AVX512: # %bb.0: 629 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 630 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 631 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 632 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 633 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) 634 ret i32 %res 635 } 636 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 637 638 define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 639 ; SSE-LABEL: test_mm_comile_ss: 640 ; SSE: # %bb.0: 641 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 642 ; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] 643 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 644 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 645 ; 646 ; AVX1-LABEL: test_mm_comile_ss: 647 ; AVX1: # %bb.0: 648 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 649 ; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] 650 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 651 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 652 ; 653 ; AVX512-LABEL: test_mm_comile_ss: 654 ; AVX512: # %bb.0: 655 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 656 ; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] 657 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 658 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 659 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) 660 ret i32 %res 661 } 662 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 663 664 define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 665 ; SSE-LABEL: test_mm_comilt_ss: 666 ; SSE: # %bb.0: 667 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 668 ; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] 669 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 670 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 671 ; 672 ; AVX1-LABEL: test_mm_comilt_ss: 673 ; AVX1: # %bb.0: 674 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 675 ; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] 676 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 677 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 678 ; 679 ; AVX512-LABEL: test_mm_comilt_ss: 680 ; AVX512: # %bb.0: 681 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 682 ; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] 683 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 684 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 685 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) 686 ret i32 %res 687 } 688 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 689 690 define i32 @test_mm_comineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 691 ; SSE-LABEL: test_mm_comineq_ss: 692 ; SSE: # %bb.0: 693 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 694 ; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 695 ; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 696 ; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 697 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 698 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 699 ; 700 ; AVX1-LABEL: test_mm_comineq_ss: 701 ; AVX1: # %bb.0: 702 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 703 ; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 704 ; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 705 ; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 706 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 707 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 708 ; 709 ; AVX512-LABEL: test_mm_comineq_ss: 710 ; AVX512: # %bb.0: 711 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 712 ; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 713 ; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 714 ; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 715 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 716 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 717 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) 718 ret i32 %res 719 } 720 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 721 722 define i32 @test_mm_cvt_ss2si(<4 x float> %a0) nounwind { 723 ; SSE-LABEL: test_mm_cvt_ss2si: 724 ; SSE: # %bb.0: 725 ; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] 726 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 727 ; 728 ; AVX1-LABEL: test_mm_cvt_ss2si: 729 ; AVX1: # %bb.0: 730 ; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] 731 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 732 ; 733 ; AVX512-LABEL: test_mm_cvt_ss2si: 734 ; AVX512: # %bb.0: 735 ; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] 736 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 737 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 738 ret i32 %res 739 } 740 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 741 742 define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind { 743 ; X86-SSE-LABEL: test_mm_cvtsi32_ss: 744 ; X86-SSE: # %bb.0: 745 ; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04] 746 ; X86-SSE-NEXT: retl # encoding: [0xc3] 747 ; 748 ; X86-AVX1-LABEL: test_mm_cvtsi32_ss: 749 ; X86-AVX1: # %bb.0: 750 ; X86-AVX1-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 751 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 752 ; 753 ; X86-AVX512-LABEL: test_mm_cvtsi32_ss: 754 ; X86-AVX512: # %bb.0: 755 ; X86-AVX512-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 756 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 757 ; 758 ; X64-SSE-LABEL: test_mm_cvtsi32_ss: 759 ; X64-SSE: # %bb.0: 760 ; X64-SSE-NEXT: cvtsi2ssl %edi, %xmm0 # encoding: [0xf3,0x0f,0x2a,0xc7] 761 ; X64-SSE-NEXT: retq # encoding: [0xc3] 762 ; 763 ; X64-AVX1-LABEL: test_mm_cvtsi32_ss: 764 ; X64-AVX1: # %bb.0: 765 ; X64-AVX1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0xc7] 766 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 767 ; 768 ; X64-AVX512-LABEL: test_mm_cvtsi32_ss: 769 ; X64-AVX512: # %bb.0: 770 ; X64-AVX512-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7] 771 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 772 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) 773 ret <4 x float> %res 774 } 775 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 776 777 define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind { 778 ; X86-SSE-LABEL: test_mm_cvtss_f32: 779 ; X86-SSE: # %bb.0: 780 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 781 ; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] 782 ; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 783 ; X86-SSE-NEXT: popl %eax # encoding: [0x58] 784 ; X86-SSE-NEXT: retl # encoding: [0xc3] 785 ; 786 ; X86-AVX1-LABEL: test_mm_cvtss_f32: 787 ; X86-AVX1: # %bb.0: 788 ; X86-AVX1-NEXT: pushl %eax # encoding: [0x50] 789 ; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] 790 ; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 791 ; X86-AVX1-NEXT: popl %eax # encoding: [0x58] 792 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 793 ; 794 ; X86-AVX512-LABEL: test_mm_cvtss_f32: 795 ; X86-AVX512: # %bb.0: 796 ; X86-AVX512-NEXT: pushl %eax # encoding: [0x50] 797 ; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] 798 ; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 799 ; X86-AVX512-NEXT: popl %eax # encoding: [0x58] 800 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 801 ; 802 ; X64-LABEL: test_mm_cvtss_f32: 803 ; X64: # %bb.0: 804 ; X64-NEXT: retq # encoding: [0xc3] 805 %res = extractelement <4 x float> %a0, i32 0 806 ret float %res 807 } 808 809 define i32 @test_mm_cvtss_si32(<4 x float> %a0) nounwind { 810 ; SSE-LABEL: test_mm_cvtss_si32: 811 ; SSE: # %bb.0: 812 ; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] 813 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 814 ; 815 ; AVX1-LABEL: test_mm_cvtss_si32: 816 ; AVX1: # %bb.0: 817 ; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] 818 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 819 ; 820 ; AVX512-LABEL: test_mm_cvtss_si32: 821 ; AVX512: # %bb.0: 822 ; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] 823 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 824 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 825 ret i32 %res 826 } 827 828 define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind { 829 ; SSE-LABEL: test_mm_cvttss_si: 830 ; SSE: # %bb.0: 831 ; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] 832 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 833 ; 834 ; AVX1-LABEL: test_mm_cvttss_si: 835 ; AVX1: # %bb.0: 836 ; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] 837 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 838 ; 839 ; AVX512-LABEL: test_mm_cvttss_si: 840 ; AVX512: # %bb.0: 841 ; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] 842 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 843 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 844 ret i32 %res 845 } 846 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 847 848 define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind { 849 ; SSE-LABEL: test_mm_cvttss_si32: 850 ; SSE: # %bb.0: 851 ; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] 852 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 853 ; 854 ; AVX1-LABEL: test_mm_cvttss_si32: 855 ; AVX1: # %bb.0: 856 ; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] 857 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 858 ; 859 ; AVX512-LABEL: test_mm_cvttss_si32: 860 ; AVX512: # %bb.0: 861 ; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] 862 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 863 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 864 ret i32 %res 865 } 866 867 define <4 x float> @test_mm_div_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 868 ; SSE-LABEL: test_mm_div_ps: 869 ; SSE: # %bb.0: 870 ; SSE-NEXT: divps %xmm1, %xmm0 # encoding: [0x0f,0x5e,0xc1] 871 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 872 ; 873 ; AVX1-LABEL: test_mm_div_ps: 874 ; AVX1: # %bb.0: 875 ; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5e,0xc1] 876 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 877 ; 878 ; AVX512-LABEL: test_mm_div_ps: 879 ; AVX512: # %bb.0: 880 ; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1] 881 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 882 %res = fdiv <4 x float> %a0, %a1 883 ret <4 x float> %res 884 } 885 886 define <4 x float> @test_mm_div_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 887 ; SSE-LABEL: test_mm_div_ss: 888 ; SSE: # %bb.0: 889 ; SSE-NEXT: divss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5e,0xc1] 890 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 891 ; 892 ; AVX1-LABEL: test_mm_div_ss: 893 ; AVX1: # %bb.0: 894 ; AVX1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5e,0xc1] 895 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 896 ; 897 ; AVX512-LABEL: test_mm_div_ss: 898 ; AVX512: # %bb.0: 899 ; AVX512-NEXT: vdivss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1] 900 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 901 %ext0 = extractelement <4 x float> %a0, i32 0 902 %ext1 = extractelement <4 x float> %a1, i32 0 903 %fdiv = fdiv float %ext0, %ext1 904 %res = insertelement <4 x float> %a0, float %fdiv, i32 0 905 ret <4 x float> %res 906 } 907 908 define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { 909 ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: 910 ; X86-SSE: # %bb.0: 911 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 912 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 913 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 914 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 915 ; X86-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 916 ; X86-SSE-NEXT: # imm = 0x1F80 917 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 918 ; X86-SSE-NEXT: retl # encoding: [0xc3] 919 ; 920 ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: 921 ; X86-AVX: # %bb.0: 922 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 923 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 924 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 925 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 926 ; X86-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 927 ; X86-AVX-NEXT: # imm = 0x1F80 928 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 929 ; X86-AVX-NEXT: retl # encoding: [0xc3] 930 ; 931 ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: 932 ; X64-SSE: # %bb.0: 933 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 934 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 935 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 936 ; X64-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 937 ; X64-SSE-NEXT: # imm = 0x1F80 938 ; X64-SSE-NEXT: retq # encoding: [0xc3] 939 ; 940 ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: 941 ; X64-AVX: # %bb.0: 942 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 943 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 944 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 945 ; X64-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 946 ; X64-AVX-NEXT: # imm = 0x1F80 947 ; X64-AVX-NEXT: retq # encoding: [0xc3] 948 %1 = alloca i32, align 4 949 %2 = bitcast i32* %1 to i8* 950 call void @llvm.x86.sse.stmxcsr(i8* %2) 951 %3 = load i32, i32* %1, align 4 952 %4 = and i32 %3, 8064 953 ret i32 %4 954 } 955 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone 956 957 define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { 958 ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: 959 ; X86-SSE: # %bb.0: 960 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 961 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 962 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 963 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 964 ; X86-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 965 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 966 ; X86-SSE-NEXT: retl # encoding: [0xc3] 967 ; 968 ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: 969 ; X86-AVX: # %bb.0: 970 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 971 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 972 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 973 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 974 ; X86-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 975 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 976 ; X86-AVX-NEXT: retl # encoding: [0xc3] 977 ; 978 ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: 979 ; X64-SSE: # %bb.0: 980 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 981 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 982 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 983 ; X64-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 984 ; X64-SSE-NEXT: retq # encoding: [0xc3] 985 ; 986 ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: 987 ; X64-AVX: # %bb.0: 988 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 989 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 990 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 991 ; X64-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 992 ; X64-AVX-NEXT: retq # encoding: [0xc3] 993 %1 = alloca i32, align 4 994 %2 = bitcast i32* %1 to i8* 995 call void @llvm.x86.sse.stmxcsr(i8* %2) 996 %3 = load i32, i32* %1, align 4 997 %4 = and i32 %3, 63 998 ret i32 %4 999 } 1000 1001 define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { 1002 ; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1003 ; X86-SSE: # %bb.0: 1004 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1005 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1006 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1007 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1008 ; X86-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1009 ; X86-SSE-NEXT: # imm = 0x8000 1010 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1011 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1012 ; 1013 ; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1014 ; X86-AVX: # %bb.0: 1015 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1016 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1017 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1018 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1019 ; X86-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1020 ; X86-AVX-NEXT: # imm = 0x8000 1021 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1022 ; X86-AVX-NEXT: retl # encoding: [0xc3] 1023 ; 1024 ; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1025 ; X64-SSE: # %bb.0: 1026 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1027 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1028 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1029 ; X64-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1030 ; X64-SSE-NEXT: # imm = 0x8000 1031 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1032 ; 1033 ; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1034 ; X64-AVX: # %bb.0: 1035 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1036 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1037 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1038 ; X64-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1039 ; X64-AVX-NEXT: # imm = 0x8000 1040 ; X64-AVX-NEXT: retq # encoding: [0xc3] 1041 %1 = alloca i32, align 4 1042 %2 = bitcast i32* %1 to i8* 1043 call void @llvm.x86.sse.stmxcsr(i8* %2) 1044 %3 = load i32, i32* %1, align 4 1045 %4 = and i32 %3, 32768 1046 ret i32 %4 1047 } 1048 1049 define i32 @test_MM_GET_ROUNDING_MODE() nounwind { 1050 ; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE: 1051 ; X86-SSE: # %bb.0: 1052 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1053 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1054 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1055 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1056 ; X86-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1057 ; X86-SSE-NEXT: # imm = 0x6000 1058 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1059 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1060 ; 1061 ; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE: 1062 ; X86-AVX: # %bb.0: 1063 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1064 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1065 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1066 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1067 ; X86-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1068 ; X86-AVX-NEXT: # imm = 0x6000 1069 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1070 ; X86-AVX-NEXT: retl # encoding: [0xc3] 1071 ; 1072 ; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE: 1073 ; X64-SSE: # %bb.0: 1074 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1075 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1076 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1077 ; X64-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1078 ; X64-SSE-NEXT: # imm = 0x6000 1079 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1080 ; 1081 ; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE: 1082 ; X64-AVX: # %bb.0: 1083 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1084 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1085 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1086 ; X64-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1087 ; X64-AVX-NEXT: # imm = 0x6000 1088 ; X64-AVX-NEXT: retq # encoding: [0xc3] 1089 %1 = alloca i32, align 4 1090 %2 = bitcast i32* %1 to i8* 1091 call void @llvm.x86.sse.stmxcsr(i8* %2) 1092 %3 = load i32, i32* %1, align 4 1093 %4 = and i32 %3, 24576 1094 ret i32 %4 1095 } 1096 1097 define i32 @test_mm_getcsr() nounwind { 1098 ; X86-SSE-LABEL: test_mm_getcsr: 1099 ; X86-SSE: # %bb.0: 1100 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1101 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1102 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1103 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1104 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1105 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1106 ; 1107 ; X86-AVX-LABEL: test_mm_getcsr: 1108 ; X86-AVX: # %bb.0: 1109 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1110 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1111 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1112 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1113 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1114 ; X86-AVX-NEXT: retl # encoding: [0xc3] 1115 ; 1116 ; X64-SSE-LABEL: test_mm_getcsr: 1117 ; X64-SSE: # %bb.0: 1118 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1119 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1120 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1121 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1122 ; 1123 ; X64-AVX-LABEL: test_mm_getcsr: 1124 ; X64-AVX: # %bb.0: 1125 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1126 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1127 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1128 ; X64-AVX-NEXT: retq # encoding: [0xc3] 1129 %1 = alloca i32, align 4 1130 %2 = bitcast i32* %1 to i8* 1131 call void @llvm.x86.sse.stmxcsr(i8* %2) 1132 %3 = load i32, i32* %1, align 4 1133 ret i32 %3 1134 } 1135 1136 define <4 x float> @test_mm_load_ps(float* %a0) nounwind { 1137 ; X86-SSE-LABEL: test_mm_load_ps: 1138 ; X86-SSE: # %bb.0: 1139 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1140 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1141 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1142 ; 1143 ; X86-AVX1-LABEL: test_mm_load_ps: 1144 ; X86-AVX1: # %bb.0: 1145 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1146 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1147 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1148 ; 1149 ; X86-AVX512-LABEL: test_mm_load_ps: 1150 ; X86-AVX512: # %bb.0: 1151 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1152 ; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1153 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1154 ; 1155 ; X64-SSE-LABEL: test_mm_load_ps: 1156 ; X64-SSE: # %bb.0: 1157 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1158 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1159 ; 1160 ; X64-AVX1-LABEL: test_mm_load_ps: 1161 ; X64-AVX1: # %bb.0: 1162 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1163 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1164 ; 1165 ; X64-AVX512-LABEL: test_mm_load_ps: 1166 ; X64-AVX512: # %bb.0: 1167 ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1168 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1169 %arg0 = bitcast float* %a0 to <4 x float>* 1170 %res = load <4 x float>, <4 x float>* %arg0, align 16 1171 ret <4 x float> %res 1172 } 1173 1174 define <4 x float> @test_mm_load_ps1(float* %a0) nounwind { 1175 ; X86-SSE-LABEL: test_mm_load_ps1: 1176 ; X86-SSE: # %bb.0: 1177 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1178 ; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1179 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1180 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1181 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1182 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1183 ; 1184 ; X86-AVX1-LABEL: test_mm_load_ps1: 1185 ; X86-AVX1: # %bb.0: 1186 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1187 ; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] 1188 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1189 ; 1190 ; X86-AVX512-LABEL: test_mm_load_ps1: 1191 ; X86-AVX512: # %bb.0: 1192 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1193 ; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] 1194 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1195 ; 1196 ; X64-SSE-LABEL: test_mm_load_ps1: 1197 ; X64-SSE: # %bb.0: 1198 ; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1199 ; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1200 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1201 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1202 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1203 ; 1204 ; X64-AVX1-LABEL: test_mm_load_ps1: 1205 ; X64-AVX1: # %bb.0: 1206 ; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] 1207 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1208 ; 1209 ; X64-AVX512-LABEL: test_mm_load_ps1: 1210 ; X64-AVX512: # %bb.0: 1211 ; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] 1212 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1213 %ld = load float, float* %a0, align 4 1214 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1215 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1216 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1217 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1218 ret <4 x float> %res3 1219 } 1220 1221 define <4 x float> @test_mm_load_ss(float* %a0) nounwind { 1222 ; X86-SSE-LABEL: test_mm_load_ss: 1223 ; X86-SSE: # %bb.0: 1224 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1225 ; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1226 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1227 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1228 ; 1229 ; X86-AVX1-LABEL: test_mm_load_ss: 1230 ; X86-AVX1: # %bb.0: 1231 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1232 ; X86-AVX1-NEXT: vmovss (%eax), %xmm0 # encoding: [0xc5,0xfa,0x10,0x00] 1233 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1234 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1235 ; 1236 ; X86-AVX512-LABEL: test_mm_load_ss: 1237 ; X86-AVX512: # %bb.0: 1238 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1239 ; X86-AVX512-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] 1240 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1241 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1242 ; 1243 ; X64-SSE-LABEL: test_mm_load_ss: 1244 ; X64-SSE: # %bb.0: 1245 ; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1246 ; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1247 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1248 ; 1249 ; X64-AVX1-LABEL: test_mm_load_ss: 1250 ; X64-AVX1: # %bb.0: 1251 ; X64-AVX1-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 1252 ; X64-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1253 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1254 ; 1255 ; X64-AVX512-LABEL: test_mm_load_ss: 1256 ; X64-AVX512: # %bb.0: 1257 ; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 1258 ; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1259 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1260 %ld = load float, float* %a0, align 1 1261 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1262 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 1263 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 1264 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 1265 ret <4 x float> %res3 1266 } 1267 1268 define <4 x float> @test_mm_load1_ps(float* %a0) nounwind { 1269 ; X86-SSE-LABEL: test_mm_load1_ps: 1270 ; X86-SSE: # %bb.0: 1271 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1272 ; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1273 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1274 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1275 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1276 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1277 ; 1278 ; X86-AVX1-LABEL: test_mm_load1_ps: 1279 ; X86-AVX1: # %bb.0: 1280 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1281 ; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] 1282 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1283 ; 1284 ; X86-AVX512-LABEL: test_mm_load1_ps: 1285 ; X86-AVX512: # %bb.0: 1286 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1287 ; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] 1288 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1289 ; 1290 ; X64-SSE-LABEL: test_mm_load1_ps: 1291 ; X64-SSE: # %bb.0: 1292 ; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1293 ; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1294 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1295 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1296 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1297 ; 1298 ; X64-AVX1-LABEL: test_mm_load1_ps: 1299 ; X64-AVX1: # %bb.0: 1300 ; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] 1301 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1302 ; 1303 ; X64-AVX512-LABEL: test_mm_load1_ps: 1304 ; X64-AVX512: # %bb.0: 1305 ; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] 1306 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1307 %ld = load float, float* %a0, align 4 1308 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1309 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1310 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1311 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1312 ret <4 x float> %res3 1313 } 1314 1315 define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { 1316 ; X86-SSE-LABEL: test_mm_loadh_pi: 1317 ; X86-SSE: # %bb.0: 1318 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1319 ; X86-SSE-NEXT: movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08] 1320 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1321 ; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04] 1322 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1323 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1324 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1325 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1326 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1327 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1328 ; 1329 ; X86-AVX1-LABEL: test_mm_loadh_pi: 1330 ; X86-AVX1: # %bb.0: 1331 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1332 ; X86-AVX1-NEXT: vmovsd (%eax), %xmm1 # encoding: [0xc5,0xfb,0x10,0x08] 1333 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero 1334 ; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 1335 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 1336 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1337 ; 1338 ; X86-AVX512-LABEL: test_mm_loadh_pi: 1339 ; X86-AVX512: # %bb.0: 1340 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1341 ; X86-AVX512-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] 1342 ; X86-AVX512-NEXT: # xmm1 = mem[0],zero 1343 ; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 1344 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 1345 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1346 ; 1347 ; X64-SSE-LABEL: test_mm_loadh_pi: 1348 ; X64-SSE: # %bb.0: 1349 ; X64-SSE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] 1350 ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8] 1351 ; X64-SSE-NEXT: shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20] 1352 ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] 1353 ; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8] 1354 ; X64-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1355 ; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc] 1356 ; X64-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1357 ; X64-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1358 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1359 ; X64-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1360 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1361 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1362 ; 1363 ; X64-AVX1-LABEL: test_mm_loadh_pi: 1364 ; X64-AVX1: # %bb.0: 1365 ; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07] 1366 ; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] 1367 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1368 ; 1369 ; X64-AVX512-LABEL: test_mm_loadh_pi: 1370 ; X64-AVX512: # %bb.0: 1371 ; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07] 1372 ; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] 1373 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1374 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1375 %ld = load <2 x float>, <2 x float>* %ptr 1376 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1377 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1378 ret <4 x float> %res 1379 } 1380 1381 define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { 1382 ; X86-SSE-LABEL: test_mm_loadl_pi: 1383 ; X86-SSE: # %bb.0: 1384 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1385 ; X86-SSE-NEXT: movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08] 1386 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1387 ; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04] 1388 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1389 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1390 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1391 ; X86-SSE-NEXT: shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4] 1392 ; X86-SSE-NEXT: # xmm1 = xmm1[0,1],xmm0[2,3] 1393 ; X86-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 1394 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1395 ; 1396 ; X86-AVX1-LABEL: test_mm_loadl_pi: 1397 ; X86-AVX1: # %bb.0: 1398 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1399 ; X86-AVX1-NEXT: vmovsd (%eax), %xmm1 # encoding: [0xc5,0xfb,0x10,0x08] 1400 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero 1401 ; X86-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 1402 ; X86-AVX1-NEXT: # xmm0 = xmm1[0,1],xmm0[2,3] 1403 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1404 ; 1405 ; X86-AVX512-LABEL: test_mm_loadl_pi: 1406 ; X86-AVX512: # %bb.0: 1407 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1408 ; X86-AVX512-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] 1409 ; X86-AVX512-NEXT: # xmm1 = mem[0],zero 1410 ; X86-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 1411 ; X86-AVX512-NEXT: # xmm0 = xmm1[0,1],xmm0[2,3] 1412 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1413 ; 1414 ; X64-SSE-LABEL: test_mm_loadl_pi: 1415 ; X64-SSE: # %bb.0: 1416 ; X64-SSE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] 1417 ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8] 1418 ; X64-SSE-NEXT: shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20] 1419 ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] 1420 ; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8] 1421 ; X64-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1422 ; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc] 1423 ; X64-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1424 ; X64-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1425 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1426 ; X64-SSE-NEXT: shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4] 1427 ; X64-SSE-NEXT: # xmm1 = xmm1[0,1],xmm0[2,3] 1428 ; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 1429 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1430 ; 1431 ; X64-AVX1-LABEL: test_mm_loadl_pi: 1432 ; X64-AVX1: # %bb.0: 1433 ; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07] 1434 ; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] 1435 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1436 ; 1437 ; X64-AVX512-LABEL: test_mm_loadl_pi: 1438 ; X64-AVX512: # %bb.0: 1439 ; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07] 1440 ; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] 1441 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1442 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1443 %ld = load <2 x float>, <2 x float>* %ptr 1444 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1445 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1446 ret <4 x float> %res 1447 } 1448 1449 define <4 x float> @test_mm_loadr_ps(float* %a0) nounwind { 1450 ; X86-SSE-LABEL: test_mm_loadr_ps: 1451 ; X86-SSE: # %bb.0: 1452 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1453 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1454 ; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 1455 ; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 1456 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1457 ; 1458 ; X86-AVX1-LABEL: test_mm_loadr_ps: 1459 ; X86-AVX1: # %bb.0: 1460 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1461 ; X86-AVX1-NEXT: vpermilps $27, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] 1462 ; X86-AVX1-NEXT: # xmm0 = mem[3,2,1,0] 1463 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1464 ; 1465 ; X86-AVX512-LABEL: test_mm_loadr_ps: 1466 ; X86-AVX512: # %bb.0: 1467 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1468 ; X86-AVX512-NEXT: vpermilps $27, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] 1469 ; X86-AVX512-NEXT: # xmm0 = mem[3,2,1,0] 1470 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1471 ; 1472 ; X64-SSE-LABEL: test_mm_loadr_ps: 1473 ; X64-SSE: # %bb.0: 1474 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1475 ; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 1476 ; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 1477 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1478 ; 1479 ; X64-AVX1-LABEL: test_mm_loadr_ps: 1480 ; X64-AVX1: # %bb.0: 1481 ; X64-AVX1-NEXT: vpermilps $27, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] 1482 ; X64-AVX1-NEXT: # xmm0 = mem[3,2,1,0] 1483 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1484 ; 1485 ; X64-AVX512-LABEL: test_mm_loadr_ps: 1486 ; X64-AVX512: # %bb.0: 1487 ; X64-AVX512-NEXT: vpermilps $27, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] 1488 ; X64-AVX512-NEXT: # xmm0 = mem[3,2,1,0] 1489 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1490 %arg0 = bitcast float* %a0 to <4 x float>* 1491 %ld = load <4 x float>, <4 x float>* %arg0, align 16 1492 %res = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1493 ret <4 x float> %res 1494 } 1495 1496 define <4 x float> @test_mm_loadu_ps(float* %a0) nounwind { 1497 ; X86-SSE-LABEL: test_mm_loadu_ps: 1498 ; X86-SSE: # %bb.0: 1499 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1500 ; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 1501 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1502 ; 1503 ; X86-AVX1-LABEL: test_mm_loadu_ps: 1504 ; X86-AVX1: # %bb.0: 1505 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1506 ; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 1507 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 1508 ; 1509 ; X86-AVX512-LABEL: test_mm_loadu_ps: 1510 ; X86-AVX512: # %bb.0: 1511 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1512 ; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 1513 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 1514 ; 1515 ; X64-SSE-LABEL: test_mm_loadu_ps: 1516 ; X64-SSE: # %bb.0: 1517 ; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 1518 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1519 ; 1520 ; X64-AVX1-LABEL: test_mm_loadu_ps: 1521 ; X64-AVX1: # %bb.0: 1522 ; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 1523 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 1524 ; 1525 ; X64-AVX512-LABEL: test_mm_loadu_ps: 1526 ; X64-AVX512: # %bb.0: 1527 ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 1528 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 1529 %arg0 = bitcast float* %a0 to <4 x float>* 1530 %res = load <4 x float>, <4 x float>* %arg0, align 1 1531 ret <4 x float> %res 1532 } 1533 1534 define <4 x float> @test_mm_max_ps(<4 x float> %a0, <4 x float> %a1) { 1535 ; SSE-LABEL: test_mm_max_ps: 1536 ; SSE: # %bb.0: 1537 ; SSE-NEXT: maxps %xmm1, %xmm0 # encoding: [0x0f,0x5f,0xc1] 1538 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1539 ; 1540 ; AVX1-LABEL: test_mm_max_ps: 1541 ; AVX1: # %bb.0: 1542 ; AVX1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5f,0xc1] 1543 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1544 ; 1545 ; AVX512-LABEL: test_mm_max_ps: 1546 ; AVX512: # %bb.0: 1547 ; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] 1548 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1549 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1550 ret <4 x float> %res 1551 } 1552 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1553 1554 define <4 x float> @test_mm_max_ss(<4 x float> %a0, <4 x float> %a1) { 1555 ; SSE-LABEL: test_mm_max_ss: 1556 ; SSE: # %bb.0: 1557 ; SSE-NEXT: maxss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5f,0xc1] 1558 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1559 ; 1560 ; AVX1-LABEL: test_mm_max_ss: 1561 ; AVX1: # %bb.0: 1562 ; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5f,0xc1] 1563 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1564 ; 1565 ; AVX512-LABEL: test_mm_max_ss: 1566 ; AVX512: # %bb.0: 1567 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5f,0xc1] 1568 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1569 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1570 ret <4 x float> %res 1571 } 1572 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1573 1574 define <4 x float> @test_mm_min_ps(<4 x float> %a0, <4 x float> %a1) { 1575 ; SSE-LABEL: test_mm_min_ps: 1576 ; SSE: # %bb.0: 1577 ; SSE-NEXT: minps %xmm1, %xmm0 # encoding: [0x0f,0x5d,0xc1] 1578 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1579 ; 1580 ; AVX1-LABEL: test_mm_min_ps: 1581 ; AVX1: # %bb.0: 1582 ; AVX1-NEXT: vminps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5d,0xc1] 1583 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1584 ; 1585 ; AVX512-LABEL: test_mm_min_ps: 1586 ; AVX512: # %bb.0: 1587 ; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] 1588 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1589 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1590 ret <4 x float> %res 1591 } 1592 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1593 1594 define <4 x float> @test_mm_min_ss(<4 x float> %a0, <4 x float> %a1) { 1595 ; SSE-LABEL: test_mm_min_ss: 1596 ; SSE: # %bb.0: 1597 ; SSE-NEXT: minss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5d,0xc1] 1598 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1599 ; 1600 ; AVX1-LABEL: test_mm_min_ss: 1601 ; AVX1: # %bb.0: 1602 ; AVX1-NEXT: vminss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5d,0xc1] 1603 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1604 ; 1605 ; AVX512-LABEL: test_mm_min_ss: 1606 ; AVX512: # %bb.0: 1607 ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5d,0xc1] 1608 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1609 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1610 ret <4 x float> %res 1611 } 1612 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1613 1614 define <4 x float> @test_mm_move_ss(<4 x float> %a0, <4 x float> %a1) { 1615 ; SSE-LABEL: test_mm_move_ss: 1616 ; SSE: # %bb.0: 1617 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 1618 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 1619 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1620 ; 1621 ; AVX-LABEL: test_mm_move_ss: 1622 ; AVX: # %bb.0: 1623 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 1624 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 1625 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1626 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1627 ret <4 x float> %res 1628 } 1629 1630 define <4 x float> @test_mm_movehl_ps(<4 x float> %a0, <4 x float> %a1) { 1631 ; SSE-LABEL: test_mm_movehl_ps: 1632 ; SSE: # %bb.0: 1633 ; SSE-NEXT: movhlps %xmm1, %xmm0 # encoding: [0x0f,0x12,0xc1] 1634 ; SSE-NEXT: # xmm0 = xmm1[1],xmm0[1] 1635 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1636 ; 1637 ; AVX1-LABEL: test_mm_movehl_ps: 1638 ; AVX1: # %bb.0: 1639 ; AVX1-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x15,0xc0] 1640 ; AVX1-NEXT: # xmm0 = xmm1[1],xmm0[1] 1641 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1642 ; 1643 ; AVX512-LABEL: test_mm_movehl_ps: 1644 ; AVX512: # %bb.0: 1645 ; AVX512-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x15,0xc0] 1646 ; AVX512-NEXT: # xmm0 = xmm1[1],xmm0[1] 1647 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1648 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 1649 ret <4 x float> %res 1650 } 1651 1652 define <4 x float> @test_mm_movelh_ps(<4 x float> %a0, <4 x float> %a1) { 1653 ; SSE-LABEL: test_mm_movelh_ps: 1654 ; SSE: # %bb.0: 1655 ; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1656 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1657 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1658 ; 1659 ; AVX1-LABEL: test_mm_movelh_ps: 1660 ; AVX1: # %bb.0: 1661 ; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 1662 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 1663 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1664 ; 1665 ; AVX512-LABEL: test_mm_movelh_ps: 1666 ; AVX512: # %bb.0: 1667 ; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 1668 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 1669 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1670 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1671 ret <4 x float> %res 1672 } 1673 1674 define i32 @test_mm_movemask_ps(<4 x float> %a0) nounwind { 1675 ; SSE-LABEL: test_mm_movemask_ps: 1676 ; SSE: # %bb.0: 1677 ; SSE-NEXT: movmskps %xmm0, %eax # encoding: [0x0f,0x50,0xc0] 1678 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1679 ; 1680 ; AVX-LABEL: test_mm_movemask_ps: 1681 ; AVX: # %bb.0: 1682 ; AVX-NEXT: vmovmskps %xmm0, %eax # encoding: [0xc5,0xf8,0x50,0xc0] 1683 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1684 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 1685 ret i32 %res 1686 } 1687 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1688 1689 define <4 x float> @test_mm_mul_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1690 ; SSE-LABEL: test_mm_mul_ps: 1691 ; SSE: # %bb.0: 1692 ; SSE-NEXT: mulps %xmm1, %xmm0 # encoding: [0x0f,0x59,0xc1] 1693 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1694 ; 1695 ; AVX1-LABEL: test_mm_mul_ps: 1696 ; AVX1: # %bb.0: 1697 ; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x59,0xc1] 1698 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1699 ; 1700 ; AVX512-LABEL: test_mm_mul_ps: 1701 ; AVX512: # %bb.0: 1702 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1] 1703 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1704 %res = fmul <4 x float> %a0, %a1 1705 ret <4 x float> %res 1706 } 1707 1708 define <4 x float> @test_mm_mul_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 1709 ; SSE-LABEL: test_mm_mul_ss: 1710 ; SSE: # %bb.0: 1711 ; SSE-NEXT: mulss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x59,0xc1] 1712 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1713 ; 1714 ; AVX1-LABEL: test_mm_mul_ss: 1715 ; AVX1: # %bb.0: 1716 ; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x59,0xc1] 1717 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1718 ; 1719 ; AVX512-LABEL: test_mm_mul_ss: 1720 ; AVX512: # %bb.0: 1721 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1] 1722 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1723 %ext0 = extractelement <4 x float> %a0, i32 0 1724 %ext1 = extractelement <4 x float> %a1, i32 0 1725 %fmul = fmul float %ext0, %ext1 1726 %res = insertelement <4 x float> %a0, float %fmul, i32 0 1727 ret <4 x float> %res 1728 } 1729 1730 define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1731 ; SSE-LABEL: test_mm_or_ps: 1732 ; SSE: # %bb.0: 1733 ; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 1734 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1735 ; 1736 ; AVX1-LABEL: test_mm_or_ps: 1737 ; AVX1: # %bb.0: 1738 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 1739 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1740 ; 1741 ; AVX512-LABEL: test_mm_or_ps: 1742 ; AVX512: # %bb.0: 1743 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 1744 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1745 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 1746 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 1747 %res = or <4 x i32> %arg0, %arg1 1748 %bc = bitcast <4 x i32> %res to <4 x float> 1749 ret <4 x float> %bc 1750 } 1751 1752 define void @test_mm_prefetch(i8* %a0) { 1753 ; X86-LABEL: test_mm_prefetch: 1754 ; X86: # %bb.0: 1755 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1756 ; X86-NEXT: prefetchnta (%eax) # encoding: [0x0f,0x18,0x00] 1757 ; X86-NEXT: retl # encoding: [0xc3] 1758 ; 1759 ; X64-LABEL: test_mm_prefetch: 1760 ; X64: # %bb.0: 1761 ; X64-NEXT: prefetchnta (%rdi) # encoding: [0x0f,0x18,0x07] 1762 ; X64-NEXT: retq # encoding: [0xc3] 1763 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1) 1764 ret void 1765 } 1766 declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone 1767 1768 define <4 x float> @test_mm_rcp_ps(<4 x float> %a0) { 1769 ; SSE-LABEL: test_mm_rcp_ps: 1770 ; SSE: # %bb.0: 1771 ; SSE-NEXT: rcpps %xmm0, %xmm0 # encoding: [0x0f,0x53,0xc0] 1772 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1773 ; 1774 ; AVX-LABEL: test_mm_rcp_ps: 1775 ; AVX: # %bb.0: 1776 ; AVX-NEXT: vrcpps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x53,0xc0] 1777 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1778 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1779 ret <4 x float> %res 1780 } 1781 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1782 1783 define <4 x float> @test_mm_rcp_ss(<4 x float> %a0) { 1784 ; SSE-LABEL: test_mm_rcp_ss: 1785 ; SSE: # %bb.0: 1786 ; SSE-NEXT: rcpss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x53,0xc0] 1787 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1788 ; 1789 ; AVX-LABEL: test_mm_rcp_ss: 1790 ; AVX: # %bb.0: 1791 ; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x53,0xc0] 1792 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1793 %rcp = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) 1794 ret <4 x float> %rcp 1795 } 1796 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1797 1798 define <4 x float> @test_mm_rsqrt_ps(<4 x float> %a0) { 1799 ; SSE-LABEL: test_mm_rsqrt_ps: 1800 ; SSE: # %bb.0: 1801 ; SSE-NEXT: rsqrtps %xmm0, %xmm0 # encoding: [0x0f,0x52,0xc0] 1802 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1803 ; 1804 ; AVX-LABEL: test_mm_rsqrt_ps: 1805 ; AVX: # %bb.0: 1806 ; AVX-NEXT: vrsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x52,0xc0] 1807 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1808 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1809 ret <4 x float> %res 1810 } 1811 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1812 1813 define <4 x float> @test_mm_rsqrt_ss(<4 x float> %a0) { 1814 ; SSE-LABEL: test_mm_rsqrt_ss: 1815 ; SSE: # %bb.0: 1816 ; SSE-NEXT: rsqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x52,0xc0] 1817 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1818 ; 1819 ; AVX-LABEL: test_mm_rsqrt_ss: 1820 ; AVX: # %bb.0: 1821 ; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x52,0xc0] 1822 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1823 %rsqrt = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) 1824 ret <4 x float> %rsqrt 1825 } 1826 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1827 1828 define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind { 1829 ; X86-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: 1830 ; X86-SSE: # %bb.0: 1831 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1832 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1833 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1834 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1835 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1836 ; X86-SSE-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] 1837 ; X86-SSE-NEXT: # imm = 0xE07F 1838 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1839 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1840 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1841 ; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1842 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1843 ; 1844 ; X86-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: 1845 ; X86-AVX: # %bb.0: 1846 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1847 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1848 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1849 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1850 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1851 ; X86-AVX-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] 1852 ; X86-AVX-NEXT: # imm = 0xE07F 1853 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1854 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1855 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1856 ; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1857 ; X86-AVX-NEXT: retl # encoding: [0xc3] 1858 ; 1859 ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: 1860 ; X64-SSE: # %bb.0: 1861 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1862 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1863 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1864 ; X64-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] 1865 ; X64-SSE-NEXT: # imm = 0xE07F 1866 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1867 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1868 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1869 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1870 ; 1871 ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: 1872 ; X64-AVX: # %bb.0: 1873 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1874 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1875 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1876 ; X64-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] 1877 ; X64-AVX-NEXT: # imm = 0xE07F 1878 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1879 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1880 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1881 ; X64-AVX-NEXT: retq # encoding: [0xc3] 1882 %1 = alloca i32, align 4 1883 %2 = bitcast i32* %1 to i8* 1884 call void @llvm.x86.sse.stmxcsr(i8* %2) 1885 %3 = load i32, i32* %1 1886 %4 = and i32 %3, -8065 1887 %5 = or i32 %4, %a0 1888 store i32 %5, i32* %1 1889 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1890 ret void 1891 } 1892 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone 1893 1894 define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind { 1895 ; X86-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: 1896 ; X86-SSE: # %bb.0: 1897 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1898 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1899 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1900 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1901 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1902 ; X86-SSE-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] 1903 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1904 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1905 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1906 ; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1907 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1908 ; 1909 ; X86-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: 1910 ; X86-AVX: # %bb.0: 1911 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1912 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1913 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1914 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1915 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1916 ; X86-AVX-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] 1917 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1918 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1919 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1920 ; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1921 ; X86-AVX-NEXT: retl # encoding: [0xc3] 1922 ; 1923 ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: 1924 ; X64-SSE: # %bb.0: 1925 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1926 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1927 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1928 ; X64-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] 1929 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1930 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1931 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1932 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1933 ; 1934 ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: 1935 ; X64-AVX: # %bb.0: 1936 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1937 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1938 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1939 ; X64-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] 1940 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1941 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1942 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1943 ; X64-AVX-NEXT: retq # encoding: [0xc3] 1944 %1 = alloca i32, align 4 1945 %2 = bitcast i32* %1 to i8* 1946 call void @llvm.x86.sse.stmxcsr(i8* %2) 1947 %3 = load i32, i32* %1 1948 %4 = and i32 %3, -64 1949 %5 = or i32 %4, %a0 1950 store i32 %5, i32* %1 1951 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1952 ret void 1953 } 1954 1955 define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind { 1956 ; X86-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1957 ; X86-SSE: # %bb.0: 1958 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1959 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1960 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1961 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1962 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1963 ; X86-SSE-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] 1964 ; X86-SSE-NEXT: # imm = 0xFFFF7FFF 1965 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1966 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1967 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1968 ; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1969 ; X86-SSE-NEXT: retl # encoding: [0xc3] 1970 ; 1971 ; X86-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1972 ; X86-AVX: # %bb.0: 1973 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1974 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1975 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1976 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1977 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1978 ; X86-AVX-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] 1979 ; X86-AVX-NEXT: # imm = 0xFFFF7FFF 1980 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1981 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1982 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1983 ; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1984 ; X86-AVX-NEXT: retl # encoding: [0xc3] 1985 ; 1986 ; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1987 ; X64-SSE: # %bb.0: 1988 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1989 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1990 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1991 ; X64-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] 1992 ; X64-SSE-NEXT: # imm = 0xFFFF7FFF 1993 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1994 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1995 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1996 ; X64-SSE-NEXT: retq # encoding: [0xc3] 1997 ; 1998 ; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1999 ; X64-AVX: # %bb.0: 2000 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2001 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 2002 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2003 ; X64-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] 2004 ; X64-AVX-NEXT: # imm = 0xFFFF7FFF 2005 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2006 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2007 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2008 ; X64-AVX-NEXT: retq # encoding: [0xc3] 2009 %1 = alloca i32, align 4 2010 %2 = bitcast i32* %1 to i8* 2011 call void @llvm.x86.sse.stmxcsr(i8* %2) 2012 %3 = load i32, i32* %1 2013 %4 = and i32 %3, -32769 2014 %5 = or i32 %4, %a0 2015 store i32 %5, i32* %1 2016 call void @llvm.x86.sse.ldmxcsr(i8* %2) 2017 ret void 2018 } 2019 2020 define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 2021 ; X86-SSE-LABEL: test_mm_set_ps: 2022 ; X86-SSE: # %bb.0: 2023 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 2024 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2025 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 2026 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2027 ; X86-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 2028 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2029 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08] 2030 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2031 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x04] 2032 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 2033 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 2034 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 2035 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 2036 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2037 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2038 ; 2039 ; X86-AVX1-LABEL: test_mm_set_ps: 2040 ; X86-AVX1: # %bb.0: 2041 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2042 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2043 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2044 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2045 ; X86-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2046 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2047 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08] 2048 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2049 ; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20] 2050 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2051 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04] 2052 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2053 ; X86-AVX1-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30] 2054 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0] 2055 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2056 ; 2057 ; X86-AVX512-LABEL: test_mm_set_ps: 2058 ; X86-AVX512: # %bb.0: 2059 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2060 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2061 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2062 ; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2063 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2064 ; X86-AVX512-NEXT: # xmm2 = mem[0],zero,zero,zero 2065 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2066 ; X86-AVX512-NEXT: # xmm3 = mem[0],zero,zero,zero 2067 ; X86-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2068 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2069 ; X86-AVX512-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2070 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2071 ; X86-AVX512-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2072 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2073 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2074 ; 2075 ; X64-SSE-LABEL: test_mm_set_ps: 2076 ; X64-SSE: # %bb.0: 2077 ; X64-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 2078 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2079 ; X64-SSE-NEXT: unpcklps %xmm2, %xmm3 # encoding: [0x0f,0x14,0xda] 2080 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 2081 ; X64-SSE-NEXT: movlhps %xmm1, %xmm3 # encoding: [0x0f,0x16,0xd9] 2082 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm1[0] 2083 ; X64-SSE-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] 2084 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2085 ; 2086 ; X64-AVX1-LABEL: test_mm_set_ps: 2087 ; X64-AVX1: # %bb.0: 2088 ; X64-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2089 ; X64-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2090 ; X64-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2091 ; X64-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2092 ; X64-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2093 ; X64-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2094 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2095 ; 2096 ; X64-AVX512-LABEL: test_mm_set_ps: 2097 ; X64-AVX512: # %bb.0: 2098 ; X64-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2099 ; X64-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2100 ; X64-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2101 ; X64-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2102 ; X64-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2103 ; X64-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2104 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2105 %res0 = insertelement <4 x float> undef, float %a3, i32 0 2106 %res1 = insertelement <4 x float> %res0, float %a2, i32 1 2107 %res2 = insertelement <4 x float> %res1, float %a1, i32 2 2108 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2109 ret <4 x float> %res3 2110 } 2111 2112 define <4 x float> @test_mm_set_ps1(float %a0) nounwind { 2113 ; X86-SSE-LABEL: test_mm_set_ps1: 2114 ; X86-SSE: # %bb.0: 2115 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2116 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2117 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2118 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2119 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2120 ; 2121 ; X86-AVX1-LABEL: test_mm_set_ps1: 2122 ; X86-AVX1: # %bb.0: 2123 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2124 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2125 ; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2126 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2127 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2128 ; 2129 ; X86-AVX512-LABEL: test_mm_set_ps1: 2130 ; X86-AVX512: # %bb.0: 2131 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2132 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2133 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2134 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2135 ; 2136 ; X64-SSE-LABEL: test_mm_set_ps1: 2137 ; X64-SSE: # %bb.0: 2138 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2139 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2140 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2141 ; 2142 ; X64-AVX1-LABEL: test_mm_set_ps1: 2143 ; X64-AVX1: # %bb.0: 2144 ; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2145 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2146 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2147 ; 2148 ; X64-AVX512-LABEL: test_mm_set_ps1: 2149 ; X64-AVX512: # %bb.0: 2150 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2151 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2152 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2153 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 2154 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 2155 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2156 ret <4 x float> %res3 2157 } 2158 2159 define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind { 2160 ; X86-SSE-LABEL: test_MM_SET_ROUNDING_MODE: 2161 ; X86-SSE: # %bb.0: 2162 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 2163 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2164 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 2165 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 2166 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 2167 ; X86-SSE-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] 2168 ; X86-SSE-NEXT: # imm = 0x9FFF 2169 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 2170 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 2171 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 2172 ; X86-SSE-NEXT: popl %eax # encoding: [0x58] 2173 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2174 ; 2175 ; X86-AVX-LABEL: test_MM_SET_ROUNDING_MODE: 2176 ; X86-AVX: # %bb.0: 2177 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 2178 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2179 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 2180 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 2181 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 2182 ; X86-AVX-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] 2183 ; X86-AVX-NEXT: # imm = 0x9FFF 2184 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 2185 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 2186 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 2187 ; X86-AVX-NEXT: popl %eax # encoding: [0x58] 2188 ; X86-AVX-NEXT: retl # encoding: [0xc3] 2189 ; 2190 ; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE: 2191 ; X64-SSE: # %bb.0: 2192 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2193 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 2194 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2195 ; X64-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] 2196 ; X64-SSE-NEXT: # imm = 0x9FFF 2197 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2198 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2199 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 2200 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2201 ; 2202 ; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE: 2203 ; X64-AVX: # %bb.0: 2204 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2205 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 2206 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2207 ; X64-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] 2208 ; X64-AVX-NEXT: # imm = 0x9FFF 2209 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2210 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2211 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2212 ; X64-AVX-NEXT: retq # encoding: [0xc3] 2213 %1 = alloca i32, align 4 2214 %2 = bitcast i32* %1 to i8* 2215 call void @llvm.x86.sse.stmxcsr(i8* %2) 2216 %3 = load i32, i32* %1 2217 %4 = and i32 %3, -24577 2218 %5 = or i32 %4, %a0 2219 store i32 %5, i32* %1 2220 call void @llvm.x86.sse.ldmxcsr(i8* %2) 2221 ret void 2222 } 2223 2224 define <4 x float> @test_mm_set_ss(float %a0) nounwind { 2225 ; X86-SSE-LABEL: test_mm_set_ss: 2226 ; X86-SSE: # %bb.0: 2227 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] 2228 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2229 ; X86-SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 2230 ; X86-SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 2231 ; X86-SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 2232 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2233 ; 2234 ; X86-AVX1-LABEL: test_mm_set_ss: 2235 ; X86-AVX1: # %bb.0: 2236 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2237 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2238 ; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2239 ; X86-AVX1-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2240 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2241 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2242 ; 2243 ; X86-AVX512-LABEL: test_mm_set_ss: 2244 ; X86-AVX512: # %bb.0: 2245 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2246 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2247 ; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2248 ; X86-AVX512-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2249 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2250 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2251 ; 2252 ; X64-SSE-LABEL: test_mm_set_ss: 2253 ; X64-SSE: # %bb.0: 2254 ; X64-SSE-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] 2255 ; X64-SSE-NEXT: movss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0x10,0xc8] 2256 ; X64-SSE-NEXT: # xmm1 = xmm0[0],xmm1[1,2,3] 2257 ; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 2258 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2259 ; 2260 ; X64-AVX-LABEL: test_mm_set_ss: 2261 ; X64-AVX: # %bb.0: 2262 ; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2263 ; X64-AVX-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2264 ; X64-AVX-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2265 ; X64-AVX-NEXT: retq # encoding: [0xc3] 2266 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2267 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 2268 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 2269 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 2270 ret <4 x float> %res3 2271 } 2272 2273 define <4 x float> @test_mm_set1_ps(float %a0) nounwind { 2274 ; X86-SSE-LABEL: test_mm_set1_ps: 2275 ; X86-SSE: # %bb.0: 2276 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2277 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2278 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2279 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2280 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2281 ; 2282 ; X86-AVX1-LABEL: test_mm_set1_ps: 2283 ; X86-AVX1: # %bb.0: 2284 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2285 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2286 ; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2287 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2288 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2289 ; 2290 ; X86-AVX512-LABEL: test_mm_set1_ps: 2291 ; X86-AVX512: # %bb.0: 2292 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2293 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2294 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2295 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2296 ; 2297 ; X64-SSE-LABEL: test_mm_set1_ps: 2298 ; X64-SSE: # %bb.0: 2299 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2300 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2301 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2302 ; 2303 ; X64-AVX1-LABEL: test_mm_set1_ps: 2304 ; X64-AVX1: # %bb.0: 2305 ; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2306 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2307 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2308 ; 2309 ; X64-AVX512-LABEL: test_mm_set1_ps: 2310 ; X64-AVX512: # %bb.0: 2311 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2312 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2313 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2314 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 2315 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 2316 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2317 ret <4 x float> %res3 2318 } 2319 2320 define void @test_mm_setcsr(i32 %a0) nounwind { 2321 ; X86-SSE-LABEL: test_mm_setcsr: 2322 ; X86-SSE: # %bb.0: 2323 ; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] 2324 ; X86-SSE-NEXT: ldmxcsr (%eax) # encoding: [0x0f,0xae,0x10] 2325 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2326 ; 2327 ; X86-AVX-LABEL: test_mm_setcsr: 2328 ; X86-AVX: # %bb.0: 2329 ; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] 2330 ; X86-AVX-NEXT: vldmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x10] 2331 ; X86-AVX-NEXT: retl # encoding: [0xc3] 2332 ; 2333 ; X64-SSE-LABEL: test_mm_setcsr: 2334 ; X64-SSE: # %bb.0: 2335 ; X64-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] 2336 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2337 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 2338 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2339 ; 2340 ; X64-AVX-LABEL: test_mm_setcsr: 2341 ; X64-AVX: # %bb.0: 2342 ; X64-AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] 2343 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2344 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2345 ; X64-AVX-NEXT: retq # encoding: [0xc3] 2346 %st = alloca i32, align 4 2347 store i32 %a0, i32* %st, align 4 2348 %bc = bitcast i32* %st to i8* 2349 call void @llvm.x86.sse.ldmxcsr(i8* %bc) 2350 ret void 2351 } 2352 2353 define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 2354 ; X86-SSE-LABEL: test_mm_setr_ps: 2355 ; X86-SSE: # %bb.0: 2356 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 2357 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2358 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 2359 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2360 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 2361 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2362 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 2363 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 2364 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2365 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2366 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 2367 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2368 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 2369 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2370 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2371 ; 2372 ; X86-AVX1-LABEL: test_mm_setr_ps: 2373 ; X86-AVX1: # %bb.0: 2374 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2375 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2376 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2377 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2378 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2379 ; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero 2380 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2381 ; X86-AVX1-NEXT: # xmm3 = mem[0],zero,zero,zero 2382 ; X86-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2383 ; X86-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2384 ; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2385 ; X86-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2386 ; X86-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2387 ; X86-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2388 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2389 ; 2390 ; X86-AVX512-LABEL: test_mm_setr_ps: 2391 ; X86-AVX512: # %bb.0: 2392 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2393 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2394 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2395 ; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2396 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2397 ; X86-AVX512-NEXT: # xmm2 = mem[0],zero,zero,zero 2398 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2399 ; X86-AVX512-NEXT: # xmm3 = mem[0],zero,zero,zero 2400 ; X86-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2401 ; X86-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2402 ; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2403 ; X86-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2404 ; X86-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2405 ; X86-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2406 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2407 ; 2408 ; X64-SSE-LABEL: test_mm_setr_ps: 2409 ; X64-SSE: # %bb.0: 2410 ; X64-SSE-NEXT: unpcklps %xmm3, %xmm2 # encoding: [0x0f,0x14,0xd3] 2411 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2412 ; X64-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 2413 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2414 ; X64-SSE-NEXT: movlhps %xmm2, %xmm0 # encoding: [0x0f,0x16,0xc2] 2415 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 2416 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2417 ; 2418 ; X64-AVX1-LABEL: test_mm_setr_ps: 2419 ; X64-AVX1: # %bb.0: 2420 ; X64-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2421 ; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2422 ; X64-AVX1-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2423 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2424 ; X64-AVX1-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2425 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2426 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2427 ; 2428 ; X64-AVX512-LABEL: test_mm_setr_ps: 2429 ; X64-AVX512: # %bb.0: 2430 ; X64-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2431 ; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2432 ; X64-AVX512-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2433 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2434 ; X64-AVX512-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2435 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2436 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2437 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2438 %res1 = insertelement <4 x float> %res0, float %a1, i32 1 2439 %res2 = insertelement <4 x float> %res1, float %a2, i32 2 2440 %res3 = insertelement <4 x float> %res2, float %a3, i32 3 2441 ret <4 x float> %res3 2442 } 2443 2444 define <4 x float> @test_mm_setzero_ps() { 2445 ; SSE-LABEL: test_mm_setzero_ps: 2446 ; SSE: # %bb.0: 2447 ; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 2448 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2449 ; 2450 ; AVX1-LABEL: test_mm_setzero_ps: 2451 ; AVX1: # %bb.0: 2452 ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 2453 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2454 ; 2455 ; AVX512-LABEL: test_mm_setzero_ps: 2456 ; AVX512: # %bb.0: 2457 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 2458 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2459 ret <4 x float> zeroinitializer 2460 } 2461 2462 define void @test_mm_sfence() nounwind { 2463 ; CHECK-LABEL: test_mm_sfence: 2464 ; CHECK: # %bb.0: 2465 ; CHECK-NEXT: sfence # encoding: [0x0f,0xae,0xf8] 2466 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2467 call void @llvm.x86.sse.sfence() 2468 ret void 2469 } 2470 declare void @llvm.x86.sse.sfence() nounwind readnone 2471 2472 define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 2473 ; SSE-LABEL: test_mm_shuffle_ps: 2474 ; SSE: # %bb.0: 2475 ; SSE-NEXT: shufps $0, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x00] 2476 ; SSE-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2477 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2478 ; 2479 ; AVX1-LABEL: test_mm_shuffle_ps: 2480 ; AVX1: # %bb.0: 2481 ; AVX1-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc1,0x00] 2482 ; AVX1-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2483 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2484 ; 2485 ; AVX512-LABEL: test_mm_shuffle_ps: 2486 ; AVX512: # %bb.0: 2487 ; AVX512-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc1,0x00] 2488 ; AVX512-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2489 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2490 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 2491 ret <4 x float> %res 2492 } 2493 2494 define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) { 2495 ; SSE-LABEL: test_mm_sqrt_ps: 2496 ; SSE: # %bb.0: 2497 ; SSE-NEXT: sqrtps %xmm0, %xmm0 # encoding: [0x0f,0x51,0xc0] 2498 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2499 ; 2500 ; AVX1-LABEL: test_mm_sqrt_ps: 2501 ; AVX1: # %bb.0: 2502 ; AVX1-NEXT: vsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x51,0xc0] 2503 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2504 ; 2505 ; AVX512-LABEL: test_mm_sqrt_ps: 2506 ; AVX512: # %bb.0: 2507 ; AVX512-NEXT: vsqrtps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0] 2508 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2509 %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0) 2510 ret <4 x float> %res 2511 } 2512 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone 2513 2514 define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) { 2515 ; SSE-LABEL: test_mm_sqrt_ss: 2516 ; SSE: # %bb.0: 2517 ; SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2518 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2519 ; 2520 ; AVX1-LABEL: test_mm_sqrt_ss: 2521 ; AVX1: # %bb.0: 2522 ; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2523 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2524 ; 2525 ; AVX512-LABEL: test_mm_sqrt_ss: 2526 ; AVX512: # %bb.0: 2527 ; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2528 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2529 %ext = extractelement <4 x float> %a0, i32 0 2530 %sqrt = call float @llvm.sqrt.f32(float %ext) 2531 %ins = insertelement <4 x float> %a0, float %sqrt, i32 0 2532 ret <4 x float> %ins 2533 } 2534 declare float @llvm.sqrt.f32(float) nounwind readnone 2535 2536 define float @test_mm_sqrt_ss_scalar(float %a0) { 2537 ; X86-SSE-LABEL: test_mm_sqrt_ss_scalar: 2538 ; X86-SSE: # %bb.0: 2539 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 2540 ; X86-SSE-NEXT: .cfi_def_cfa_offset 8 2541 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08] 2542 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2543 ; X86-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2544 ; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] 2545 ; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2546 ; X86-SSE-NEXT: popl %eax # encoding: [0x58] 2547 ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 2548 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2549 ; 2550 ; X86-AVX1-LABEL: test_mm_sqrt_ss_scalar: 2551 ; X86-AVX1: # %bb.0: 2552 ; X86-AVX1-NEXT: pushl %eax # encoding: [0x50] 2553 ; X86-AVX1-NEXT: .cfi_def_cfa_offset 8 2554 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 2555 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2556 ; X86-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2557 ; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] 2558 ; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2559 ; X86-AVX1-NEXT: popl %eax # encoding: [0x58] 2560 ; X86-AVX1-NEXT: .cfi_def_cfa_offset 4 2561 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2562 ; 2563 ; X86-AVX512-LABEL: test_mm_sqrt_ss_scalar: 2564 ; X86-AVX512: # %bb.0: 2565 ; X86-AVX512-NEXT: pushl %eax # encoding: [0x50] 2566 ; X86-AVX512-NEXT: .cfi_def_cfa_offset 8 2567 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 2568 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2569 ; X86-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2570 ; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] 2571 ; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2572 ; X86-AVX512-NEXT: popl %eax # encoding: [0x58] 2573 ; X86-AVX512-NEXT: .cfi_def_cfa_offset 4 2574 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2575 ; 2576 ; X64-SSE-LABEL: test_mm_sqrt_ss_scalar: 2577 ; X64-SSE: # %bb.0: 2578 ; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2579 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2580 ; 2581 ; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar: 2582 ; X64-AVX1: # %bb.0: 2583 ; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2584 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2585 ; 2586 ; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar: 2587 ; X64-AVX512: # %bb.0: 2588 ; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2589 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2590 %sqrt = call float @llvm.sqrt.f32(float %a0) 2591 ret float %sqrt 2592 } 2593 2594 define void @test_mm_store_ps(float *%a0, <4 x float> %a1) { 2595 ; X86-SSE-LABEL: test_mm_store_ps: 2596 ; X86-SSE: # %bb.0: 2597 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2598 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2599 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2600 ; 2601 ; X86-AVX1-LABEL: test_mm_store_ps: 2602 ; X86-AVX1: # %bb.0: 2603 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2604 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2605 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2606 ; 2607 ; X86-AVX512-LABEL: test_mm_store_ps: 2608 ; X86-AVX512: # %bb.0: 2609 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2610 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2611 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2612 ; 2613 ; X64-SSE-LABEL: test_mm_store_ps: 2614 ; X64-SSE: # %bb.0: 2615 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2616 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2617 ; 2618 ; X64-AVX1-LABEL: test_mm_store_ps: 2619 ; X64-AVX1: # %bb.0: 2620 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2621 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2622 ; 2623 ; X64-AVX512-LABEL: test_mm_store_ps: 2624 ; X64-AVX512: # %bb.0: 2625 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2626 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2627 %arg0 = bitcast float* %a0 to <4 x float>* 2628 store <4 x float> %a1, <4 x float>* %arg0, align 16 2629 ret void 2630 } 2631 2632 define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) { 2633 ; X86-SSE-LABEL: test_mm_store_ps1: 2634 ; X86-SSE: # %bb.0: 2635 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2636 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2637 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2638 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2639 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2640 ; 2641 ; X86-AVX1-LABEL: test_mm_store_ps1: 2642 ; X86-AVX1: # %bb.0: 2643 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2644 ; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2645 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2646 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2647 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2648 ; 2649 ; X86-AVX512-LABEL: test_mm_store_ps1: 2650 ; X86-AVX512: # %bb.0: 2651 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2652 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2653 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2654 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2655 ; 2656 ; X64-SSE-LABEL: test_mm_store_ps1: 2657 ; X64-SSE: # %bb.0: 2658 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2659 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2660 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2661 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2662 ; 2663 ; X64-AVX1-LABEL: test_mm_store_ps1: 2664 ; X64-AVX1: # %bb.0: 2665 ; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2666 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2667 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2668 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2669 ; 2670 ; X64-AVX512-LABEL: test_mm_store_ps1: 2671 ; X64-AVX512: # %bb.0: 2672 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2673 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2674 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2675 %arg0 = bitcast float* %a0 to <4 x float>* 2676 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 2677 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2678 ret void 2679 } 2680 2681 define void @test_mm_store_ss(float *%a0, <4 x float> %a1) { 2682 ; X86-SSE-LABEL: test_mm_store_ss: 2683 ; X86-SSE: # %bb.0: 2684 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2685 ; X86-SSE-NEXT: movss %xmm0, (%eax) # encoding: [0xf3,0x0f,0x11,0x00] 2686 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2687 ; 2688 ; X86-AVX1-LABEL: test_mm_store_ss: 2689 ; X86-AVX1: # %bb.0: 2690 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2691 ; X86-AVX1-NEXT: vmovss %xmm0, (%eax) # encoding: [0xc5,0xfa,0x11,0x00] 2692 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2693 ; 2694 ; X86-AVX512-LABEL: test_mm_store_ss: 2695 ; X86-AVX512: # %bb.0: 2696 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2697 ; X86-AVX512-NEXT: vmovss %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x00] 2698 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2699 ; 2700 ; X64-SSE-LABEL: test_mm_store_ss: 2701 ; X64-SSE: # %bb.0: 2702 ; X64-SSE-NEXT: movss %xmm0, (%rdi) # encoding: [0xf3,0x0f,0x11,0x07] 2703 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2704 ; 2705 ; X64-AVX1-LABEL: test_mm_store_ss: 2706 ; X64-AVX1: # %bb.0: 2707 ; X64-AVX1-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 2708 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2709 ; 2710 ; X64-AVX512-LABEL: test_mm_store_ss: 2711 ; X64-AVX512: # %bb.0: 2712 ; X64-AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 2713 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2714 %ext = extractelement <4 x float> %a1, i32 0 2715 store float %ext, float* %a0, align 1 2716 ret void 2717 } 2718 2719 define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) { 2720 ; X86-SSE-LABEL: test_mm_store1_ps: 2721 ; X86-SSE: # %bb.0: 2722 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2723 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2724 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2725 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2726 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2727 ; 2728 ; X86-AVX1-LABEL: test_mm_store1_ps: 2729 ; X86-AVX1: # %bb.0: 2730 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2731 ; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2732 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2733 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2734 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2735 ; 2736 ; X86-AVX512-LABEL: test_mm_store1_ps: 2737 ; X86-AVX512: # %bb.0: 2738 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2739 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2740 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2741 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2742 ; 2743 ; X64-SSE-LABEL: test_mm_store1_ps: 2744 ; X64-SSE: # %bb.0: 2745 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2746 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2747 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2748 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2749 ; 2750 ; X64-AVX1-LABEL: test_mm_store1_ps: 2751 ; X64-AVX1: # %bb.0: 2752 ; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2753 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2754 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2755 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2756 ; 2757 ; X64-AVX512-LABEL: test_mm_store1_ps: 2758 ; X64-AVX512: # %bb.0: 2759 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2760 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2761 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2762 %arg0 = bitcast float* %a0 to <4 x float>* 2763 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 2764 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2765 ret void 2766 } 2767 2768 define void @test_mm_storeh_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { 2769 ; X86-SSE-LABEL: test_mm_storeh_ps: 2770 ; X86-SSE: # %bb.0: 2771 ; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 2772 ; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 2773 ; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 2774 ; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 2775 ; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] 2776 ; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] 2777 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] 2778 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 2779 ; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] 2780 ; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 2781 ; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 2782 ; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 2783 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2784 ; 2785 ; X86-AVX1-LABEL: test_mm_storeh_ps: 2786 ; X86-AVX1: # %bb.0: 2787 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2788 ; X86-AVX1-NEXT: vmovhpd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x17,0x00] 2789 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2790 ; 2791 ; X86-AVX512-LABEL: test_mm_storeh_ps: 2792 ; X86-AVX512: # %bb.0: 2793 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2794 ; X86-AVX512-NEXT: vmovhpd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x17,0x00] 2795 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2796 ; 2797 ; X64-SSE-LABEL: test_mm_storeh_ps: 2798 ; X64-SSE: # %bb.0: 2799 ; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] 2800 ; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xf0] 2801 ; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2802 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2803 ; 2804 ; X64-AVX1-LABEL: test_mm_storeh_ps: 2805 ; X64-AVX1: # %bb.0: 2806 ; X64-AVX1-NEXT: vpextrq $1, %xmm0, %rax # encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] 2807 ; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2808 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2809 ; 2810 ; X64-AVX512-LABEL: test_mm_storeh_ps: 2811 ; X64-AVX512: # %bb.0: 2812 ; X64-AVX512-NEXT: vpextrq $1, %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] 2813 ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2814 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2815 %ptr = bitcast x86_mmx* %a0 to i64* 2816 %bc = bitcast <4 x float> %a1 to <2 x i64> 2817 %ext = extractelement <2 x i64> %bc, i32 1 2818 store i64 %ext, i64* %ptr 2819 ret void 2820 } 2821 2822 define void @test_mm_storel_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { 2823 ; X86-SSE-LABEL: test_mm_storel_ps: 2824 ; X86-SSE: # %bb.0: 2825 ; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 2826 ; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 2827 ; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 2828 ; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 2829 ; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] 2830 ; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] 2831 ; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] 2832 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] 2833 ; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] 2834 ; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 2835 ; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 2836 ; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 2837 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2838 ; 2839 ; X86-AVX1-LABEL: test_mm_storel_ps: 2840 ; X86-AVX1: # %bb.0: 2841 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2842 ; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 2843 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2844 ; 2845 ; X86-AVX512-LABEL: test_mm_storel_ps: 2846 ; X86-AVX512: # %bb.0: 2847 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2848 ; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 2849 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2850 ; 2851 ; X64-SSE-LABEL: test_mm_storel_ps: 2852 ; X64-SSE: # %bb.0: 2853 ; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] 2854 ; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8] 2855 ; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2856 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2857 ; 2858 ; X64-AVX1-LABEL: test_mm_storel_ps: 2859 ; X64-AVX1: # %bb.0: 2860 ; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 2861 ; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2862 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2863 ; 2864 ; X64-AVX512-LABEL: test_mm_storel_ps: 2865 ; X64-AVX512: # %bb.0: 2866 ; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 2867 ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2868 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2869 %ptr = bitcast x86_mmx* %a0 to i64* 2870 %bc = bitcast <4 x float> %a1 to <2 x i64> 2871 %ext = extractelement <2 x i64> %bc, i32 0 2872 store i64 %ext, i64* %ptr 2873 ret void 2874 } 2875 2876 define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) { 2877 ; X86-SSE-LABEL: test_mm_storer_ps: 2878 ; X86-SSE: # %bb.0: 2879 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2880 ; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 2881 ; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 2882 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2883 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2884 ; 2885 ; X86-AVX1-LABEL: test_mm_storer_ps: 2886 ; X86-AVX1: # %bb.0: 2887 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2888 ; X86-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2889 ; X86-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0] 2890 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2891 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2892 ; 2893 ; X86-AVX512-LABEL: test_mm_storer_ps: 2894 ; X86-AVX512: # %bb.0: 2895 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2896 ; X86-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2897 ; X86-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0] 2898 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2899 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2900 ; 2901 ; X64-SSE-LABEL: test_mm_storer_ps: 2902 ; X64-SSE: # %bb.0: 2903 ; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 2904 ; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 2905 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2906 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2907 ; 2908 ; X64-AVX1-LABEL: test_mm_storer_ps: 2909 ; X64-AVX1: # %bb.0: 2910 ; X64-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2911 ; X64-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0] 2912 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2913 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2914 ; 2915 ; X64-AVX512-LABEL: test_mm_storer_ps: 2916 ; X64-AVX512: # %bb.0: 2917 ; X64-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2918 ; X64-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0] 2919 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2920 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2921 %arg0 = bitcast float* %a0 to <4 x float>* 2922 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 2923 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2924 ret void 2925 } 2926 2927 define void @test_mm_storeu_ps(float *%a0, <4 x float> %a1) { 2928 ; X86-SSE-LABEL: test_mm_storeu_ps: 2929 ; X86-SSE: # %bb.0: 2930 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2931 ; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 2932 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2933 ; 2934 ; X86-AVX1-LABEL: test_mm_storeu_ps: 2935 ; X86-AVX1: # %bb.0: 2936 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2937 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 2938 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2939 ; 2940 ; X86-AVX512-LABEL: test_mm_storeu_ps: 2941 ; X86-AVX512: # %bb.0: 2942 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2943 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 2944 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2945 ; 2946 ; X64-SSE-LABEL: test_mm_storeu_ps: 2947 ; X64-SSE: # %bb.0: 2948 ; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 2949 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2950 ; 2951 ; X64-AVX1-LABEL: test_mm_storeu_ps: 2952 ; X64-AVX1: # %bb.0: 2953 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 2954 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2955 ; 2956 ; X64-AVX512-LABEL: test_mm_storeu_ps: 2957 ; X64-AVX512: # %bb.0: 2958 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 2959 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2960 %arg0 = bitcast float* %a0 to <4 x float>* 2961 store <4 x float> %a1, <4 x float>* %arg0, align 1 2962 ret void 2963 } 2964 2965 define void @test_mm_stream_ps(float *%a0, <4 x float> %a1) { 2966 ; X86-SSE-LABEL: test_mm_stream_ps: 2967 ; X86-SSE: # %bb.0: 2968 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2969 ; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 2970 ; X86-SSE-NEXT: retl # encoding: [0xc3] 2971 ; 2972 ; X86-AVX1-LABEL: test_mm_stream_ps: 2973 ; X86-AVX1: # %bb.0: 2974 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2975 ; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 2976 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 2977 ; 2978 ; X86-AVX512-LABEL: test_mm_stream_ps: 2979 ; X86-AVX512: # %bb.0: 2980 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2981 ; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 2982 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 2983 ; 2984 ; X64-SSE-LABEL: test_mm_stream_ps: 2985 ; X64-SSE: # %bb.0: 2986 ; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 2987 ; X64-SSE-NEXT: retq # encoding: [0xc3] 2988 ; 2989 ; X64-AVX1-LABEL: test_mm_stream_ps: 2990 ; X64-AVX1: # %bb.0: 2991 ; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 2992 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 2993 ; 2994 ; X64-AVX512-LABEL: test_mm_stream_ps: 2995 ; X64-AVX512: # %bb.0: 2996 ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 2997 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 2998 %arg0 = bitcast float* %a0 to <4 x float>* 2999 store <4 x float> %a1, <4 x float>* %arg0, align 16, !nontemporal !0 3000 ret void 3001 } 3002 3003 define <4 x float> @test_mm_sub_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3004 ; SSE-LABEL: test_mm_sub_ps: 3005 ; SSE: # %bb.0: 3006 ; SSE-NEXT: subps %xmm1, %xmm0 # encoding: [0x0f,0x5c,0xc1] 3007 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3008 ; 3009 ; AVX1-LABEL: test_mm_sub_ps: 3010 ; AVX1: # %bb.0: 3011 ; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5c,0xc1] 3012 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3013 ; 3014 ; AVX512-LABEL: test_mm_sub_ps: 3015 ; AVX512: # %bb.0: 3016 ; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1] 3017 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3018 %res = fsub <4 x float> %a0, %a1 3019 ret <4 x float> %res 3020 } 3021 3022 define <4 x float> @test_mm_sub_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3023 ; SSE-LABEL: test_mm_sub_ss: 3024 ; SSE: # %bb.0: 3025 ; SSE-NEXT: subss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5c,0xc1] 3026 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3027 ; 3028 ; AVX1-LABEL: test_mm_sub_ss: 3029 ; AVX1: # %bb.0: 3030 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5c,0xc1] 3031 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3032 ; 3033 ; AVX512-LABEL: test_mm_sub_ss: 3034 ; AVX512: # %bb.0: 3035 ; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1] 3036 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3037 %ext0 = extractelement <4 x float> %a0, i32 0 3038 %ext1 = extractelement <4 x float> %a1, i32 0 3039 %fsub = fsub float %ext0, %ext1 3040 %res = insertelement <4 x float> %a0, float %fsub, i32 0 3041 ret <4 x float> %res 3042 } 3043 3044 define void @test_MM_TRANSPOSE4_PS(<4 x float>* %a0, <4 x float>* %a1, <4 x float>* %a2, <4 x float>* %a3) nounwind { 3045 ; X86-SSE-LABEL: test_MM_TRANSPOSE4_PS: 3046 ; X86-SSE: # %bb.0: 3047 ; X86-SSE-NEXT: pushl %esi # encoding: [0x56] 3048 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3049 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3050 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3051 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3052 ; X86-SSE-NEXT: movaps (%esi), %xmm0 # encoding: [0x0f,0x28,0x06] 3053 ; X86-SSE-NEXT: movaps (%edx), %xmm1 # encoding: [0x0f,0x28,0x0a] 3054 ; X86-SSE-NEXT: movaps (%ecx), %xmm2 # encoding: [0x0f,0x28,0x11] 3055 ; X86-SSE-NEXT: movaps (%eax), %xmm3 # encoding: [0x0f,0x28,0x18] 3056 ; X86-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] 3057 ; X86-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] 3058 ; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 3059 ; X86-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] 3060 ; X86-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] 3061 ; X86-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 3062 ; X86-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3063 ; X86-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3064 ; X86-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] 3065 ; X86-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3066 ; X86-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] 3067 ; X86-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] 3068 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0] 3069 ; X86-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] 3070 ; X86-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1] 3071 ; X86-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] 3072 ; X86-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] 3073 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0] 3074 ; X86-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] 3075 ; X86-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1] 3076 ; X86-SSE-NEXT: movaps %xmm1, (%esi) # encoding: [0x0f,0x29,0x0e] 3077 ; X86-SSE-NEXT: movaps %xmm5, (%edx) # encoding: [0x0f,0x29,0x2a] 3078 ; X86-SSE-NEXT: movaps %xmm3, (%ecx) # encoding: [0x0f,0x29,0x19] 3079 ; X86-SSE-NEXT: movaps %xmm2, (%eax) # encoding: [0x0f,0x29,0x10] 3080 ; X86-SSE-NEXT: popl %esi # encoding: [0x5e] 3081 ; X86-SSE-NEXT: retl # encoding: [0xc3] 3082 ; 3083 ; X86-AVX1-LABEL: test_MM_TRANSPOSE4_PS: 3084 ; X86-AVX1: # %bb.0: 3085 ; X86-AVX1-NEXT: pushl %esi # encoding: [0x56] 3086 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3087 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3088 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3089 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3090 ; X86-AVX1-NEXT: vmovaps (%esi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x06] 3091 ; X86-AVX1-NEXT: vmovaps (%edx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a] 3092 ; X86-AVX1-NEXT: vmovaps (%ecx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x11] 3093 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm3 # encoding: [0xc5,0xf8,0x28,0x18] 3094 ; X86-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] 3095 ; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3096 ; X86-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] 3097 ; X86-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3098 ; X86-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3099 ; X86-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3100 ; X86-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] 3101 ; X86-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3102 ; X86-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] 3103 ; X86-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0] 3104 ; X86-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] 3105 ; X86-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1] 3106 ; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] 3107 ; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0] 3108 ; X86-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 3109 ; X86-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 3110 ; X86-AVX1-NEXT: vmovaps %xmm2, (%esi) # encoding: [0xc5,0xf8,0x29,0x16] 3111 ; X86-AVX1-NEXT: vmovaps %xmm3, (%edx) # encoding: [0xc5,0xf8,0x29,0x1a] 3112 ; X86-AVX1-NEXT: vmovaps %xmm4, (%ecx) # encoding: [0xc5,0xf8,0x29,0x21] 3113 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 3114 ; X86-AVX1-NEXT: popl %esi # encoding: [0x5e] 3115 ; X86-AVX1-NEXT: retl # encoding: [0xc3] 3116 ; 3117 ; X86-AVX512-LABEL: test_MM_TRANSPOSE4_PS: 3118 ; X86-AVX512: # %bb.0: 3119 ; X86-AVX512-NEXT: pushl %esi # encoding: [0x56] 3120 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3121 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3122 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3123 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3124 ; X86-AVX512-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06] 3125 ; X86-AVX512-NEXT: vmovaps (%edx), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0a] 3126 ; X86-AVX512-NEXT: vmovaps (%ecx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x11] 3127 ; X86-AVX512-NEXT: vmovaps (%eax), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x18] 3128 ; X86-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] 3129 ; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3130 ; X86-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] 3131 ; X86-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3132 ; X86-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3133 ; X86-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3134 ; X86-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] 3135 ; X86-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3136 ; X86-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] 3137 ; X86-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0] 3138 ; X86-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] 3139 ; X86-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1] 3140 ; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] 3141 ; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0] 3142 ; X86-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 3143 ; X86-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 3144 ; X86-AVX512-NEXT: vmovaps %xmm2, (%esi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x16] 3145 ; X86-AVX512-NEXT: vmovaps %xmm3, (%edx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1a] 3146 ; X86-AVX512-NEXT: vmovaps %xmm4, (%ecx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x21] 3147 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 3148 ; X86-AVX512-NEXT: popl %esi # encoding: [0x5e] 3149 ; X86-AVX512-NEXT: retl # encoding: [0xc3] 3150 ; 3151 ; X64-SSE-LABEL: test_MM_TRANSPOSE4_PS: 3152 ; X64-SSE: # %bb.0: 3153 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 3154 ; X64-SSE-NEXT: movaps (%rsi), %xmm1 # encoding: [0x0f,0x28,0x0e] 3155 ; X64-SSE-NEXT: movaps (%rdx), %xmm2 # encoding: [0x0f,0x28,0x12] 3156 ; X64-SSE-NEXT: movaps (%rcx), %xmm3 # encoding: [0x0f,0x28,0x19] 3157 ; X64-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] 3158 ; X64-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] 3159 ; X64-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 3160 ; X64-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] 3161 ; X64-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] 3162 ; X64-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 3163 ; X64-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3164 ; X64-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3165 ; X64-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] 3166 ; X64-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3167 ; X64-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] 3168 ; X64-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] 3169 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0] 3170 ; X64-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] 3171 ; X64-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1] 3172 ; X64-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] 3173 ; X64-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] 3174 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0] 3175 ; X64-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] 3176 ; X64-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1] 3177 ; X64-SSE-NEXT: movaps %xmm1, (%rdi) # encoding: [0x0f,0x29,0x0f] 3178 ; X64-SSE-NEXT: movaps %xmm5, (%rsi) # encoding: [0x0f,0x29,0x2e] 3179 ; X64-SSE-NEXT: movaps %xmm3, (%rdx) # encoding: [0x0f,0x29,0x1a] 3180 ; X64-SSE-NEXT: movaps %xmm2, (%rcx) # encoding: [0x0f,0x29,0x11] 3181 ; X64-SSE-NEXT: retq # encoding: [0xc3] 3182 ; 3183 ; X64-AVX1-LABEL: test_MM_TRANSPOSE4_PS: 3184 ; X64-AVX1: # %bb.0: 3185 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 3186 ; X64-AVX1-NEXT: vmovaps (%rsi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0e] 3187 ; X64-AVX1-NEXT: vmovaps (%rdx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x12] 3188 ; X64-AVX1-NEXT: vmovaps (%rcx), %xmm3 # encoding: [0xc5,0xf8,0x28,0x19] 3189 ; X64-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] 3190 ; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3191 ; X64-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] 3192 ; X64-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3193 ; X64-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3194 ; X64-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3195 ; X64-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] 3196 ; X64-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3197 ; X64-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] 3198 ; X64-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0] 3199 ; X64-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] 3200 ; X64-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1] 3201 ; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] 3202 ; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0] 3203 ; X64-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 3204 ; X64-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 3205 ; X64-AVX1-NEXT: vmovaps %xmm2, (%rdi) # encoding: [0xc5,0xf8,0x29,0x17] 3206 ; X64-AVX1-NEXT: vmovaps %xmm3, (%rsi) # encoding: [0xc5,0xf8,0x29,0x1e] 3207 ; X64-AVX1-NEXT: vmovaps %xmm4, (%rdx) # encoding: [0xc5,0xf8,0x29,0x22] 3208 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rcx) # encoding: [0xc5,0xf8,0x29,0x01] 3209 ; X64-AVX1-NEXT: retq # encoding: [0xc3] 3210 ; 3211 ; X64-AVX512-LABEL: test_MM_TRANSPOSE4_PS: 3212 ; X64-AVX512: # %bb.0: 3213 ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 3214 ; X64-AVX512-NEXT: vmovaps (%rsi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0e] 3215 ; X64-AVX512-NEXT: vmovaps (%rdx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x12] 3216 ; X64-AVX512-NEXT: vmovaps (%rcx), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x19] 3217 ; X64-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] 3218 ; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3219 ; X64-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] 3220 ; X64-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3221 ; X64-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3222 ; X64-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3223 ; X64-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] 3224 ; X64-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3225 ; X64-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] 3226 ; X64-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0] 3227 ; X64-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] 3228 ; X64-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1] 3229 ; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] 3230 ; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0] 3231 ; X64-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 3232 ; X64-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 3233 ; X64-AVX512-NEXT: vmovaps %xmm2, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x17] 3234 ; X64-AVX512-NEXT: vmovaps %xmm3, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1e] 3235 ; X64-AVX512-NEXT: vmovaps %xmm4, (%rdx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x22] 3236 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rcx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x01] 3237 ; X64-AVX512-NEXT: retq # encoding: [0xc3] 3238 %row0 = load <4 x float>, <4 x float>* %a0, align 16 3239 %row1 = load <4 x float>, <4 x float>* %a1, align 16 3240 %row2 = load <4 x float>, <4 x float>* %a2, align 16 3241 %row3 = load <4 x float>, <4 x float>* %a3, align 16 3242 %tmp0 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3243 %tmp2 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3244 %tmp1 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3245 %tmp3 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3246 %res0 = shufflevector <4 x float> %tmp0, <4 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3247 %res1 = shufflevector <4 x float> %tmp2, <4 x float> %tmp0, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 3248 %res2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3249 %res3 = shufflevector <4 x float> %tmp3, <4 x float> %tmp1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 3250 store <4 x float> %res0, <4 x float>* %a0, align 16 3251 store <4 x float> %res1, <4 x float>* %a1, align 16 3252 store <4 x float> %res2, <4 x float>* %a2, align 16 3253 store <4 x float> %res3, <4 x float>* %a3, align 16 3254 ret void 3255 } 3256 3257 define i32 @test_mm_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3258 ; SSE-LABEL: test_mm_ucomieq_ss: 3259 ; SSE: # %bb.0: 3260 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3261 ; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3262 ; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3263 ; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3264 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3265 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3266 ; 3267 ; AVX1-LABEL: test_mm_ucomieq_ss: 3268 ; AVX1: # %bb.0: 3269 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3270 ; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3271 ; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3272 ; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3273 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3274 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3275 ; 3276 ; AVX512-LABEL: test_mm_ucomieq_ss: 3277 ; AVX512: # %bb.0: 3278 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3279 ; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3280 ; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3281 ; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3282 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3283 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3284 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 3285 ret i32 %res 3286 } 3287 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 3288 3289 define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3290 ; SSE-LABEL: test_mm_ucomige_ss: 3291 ; SSE: # %bb.0: 3292 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3293 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3294 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3295 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3296 ; 3297 ; AVX1-LABEL: test_mm_ucomige_ss: 3298 ; AVX1: # %bb.0: 3299 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3300 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3301 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3302 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3303 ; 3304 ; AVX512-LABEL: test_mm_ucomige_ss: 3305 ; AVX512: # %bb.0: 3306 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3307 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3308 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3309 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3310 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) 3311 ret i32 %res 3312 } 3313 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 3314 3315 define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3316 ; SSE-LABEL: test_mm_ucomigt_ss: 3317 ; SSE: # %bb.0: 3318 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3319 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3320 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3321 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3322 ; 3323 ; AVX1-LABEL: test_mm_ucomigt_ss: 3324 ; AVX1: # %bb.0: 3325 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3326 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3327 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3328 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3329 ; 3330 ; AVX512-LABEL: test_mm_ucomigt_ss: 3331 ; AVX512: # %bb.0: 3332 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3333 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3334 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3335 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3336 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) 3337 ret i32 %res 3338 } 3339 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 3340 3341 define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3342 ; SSE-LABEL: test_mm_ucomile_ss: 3343 ; SSE: # %bb.0: 3344 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3345 ; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] 3346 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3347 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3348 ; 3349 ; AVX1-LABEL: test_mm_ucomile_ss: 3350 ; AVX1: # %bb.0: 3351 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3352 ; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] 3353 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3354 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3355 ; 3356 ; AVX512-LABEL: test_mm_ucomile_ss: 3357 ; AVX512: # %bb.0: 3358 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3359 ; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] 3360 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3361 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3362 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) 3363 ret i32 %res 3364 } 3365 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 3366 3367 define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3368 ; SSE-LABEL: test_mm_ucomilt_ss: 3369 ; SSE: # %bb.0: 3370 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3371 ; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] 3372 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3373 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3374 ; 3375 ; AVX1-LABEL: test_mm_ucomilt_ss: 3376 ; AVX1: # %bb.0: 3377 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3378 ; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] 3379 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3380 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3381 ; 3382 ; AVX512-LABEL: test_mm_ucomilt_ss: 3383 ; AVX512: # %bb.0: 3384 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3385 ; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] 3386 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3387 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3388 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) 3389 ret i32 %res 3390 } 3391 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 3392 3393 define i32 @test_mm_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3394 ; SSE-LABEL: test_mm_ucomineq_ss: 3395 ; SSE: # %bb.0: 3396 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3397 ; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3398 ; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3399 ; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3400 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3401 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3402 ; 3403 ; AVX1-LABEL: test_mm_ucomineq_ss: 3404 ; AVX1: # %bb.0: 3405 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3406 ; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3407 ; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3408 ; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3409 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3410 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3411 ; 3412 ; AVX512-LABEL: test_mm_ucomineq_ss: 3413 ; AVX512: # %bb.0: 3414 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3415 ; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3416 ; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3417 ; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3418 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3419 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3420 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) 3421 ret i32 %res 3422 } 3423 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 3424 3425 define <4 x float> @test_mm_undefined_ps() { 3426 ; CHECK-LABEL: test_mm_undefined_ps: 3427 ; CHECK: # %bb.0: 3428 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3429 ret <4 x float> undef 3430 } 3431 3432 define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3433 ; SSE-LABEL: test_mm_unpackhi_ps: 3434 ; SSE: # %bb.0: 3435 ; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3436 ; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3437 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3438 ; 3439 ; AVX1-LABEL: test_mm_unpackhi_ps: 3440 ; AVX1: # %bb.0: 3441 ; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3442 ; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3443 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3444 ; 3445 ; AVX512-LABEL: test_mm_unpackhi_ps: 3446 ; AVX512: # %bb.0: 3447 ; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3448 ; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3449 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3450 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3451 ret <4 x float> %res 3452 } 3453 3454 define <4 x float> @test_mm_unpacklo_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3455 ; SSE-LABEL: test_mm_unpacklo_ps: 3456 ; SSE: # %bb.0: 3457 ; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 3458 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3459 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3460 ; 3461 ; AVX1-LABEL: test_mm_unpacklo_ps: 3462 ; AVX1: # %bb.0: 3463 ; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1] 3464 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3465 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3466 ; 3467 ; AVX512-LABEL: test_mm_unpacklo_ps: 3468 ; AVX512: # %bb.0: 3469 ; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] 3470 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3471 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3472 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3473 ret <4 x float> %res 3474 } 3475 3476 define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3477 ; SSE-LABEL: test_mm_xor_ps: 3478 ; SSE: # %bb.0: 3479 ; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 3480 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3481 ; 3482 ; AVX1-LABEL: test_mm_xor_ps: 3483 ; AVX1: # %bb.0: 3484 ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 3485 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3486 ; 3487 ; AVX512-LABEL: test_mm_xor_ps: 3488 ; AVX512: # %bb.0: 3489 ; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 3490 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3491 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 3492 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 3493 %res = xor <4 x i32> %arg0, %arg1 3494 %bc = bitcast <4 x i32> %res to <4 x float> 3495 ret <4 x float> %bc 3496 } 3497 3498 !0 = !{i32 1} 3499