1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9 10 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 11 ; SSE-LABEL: test_x86_sse2_sqrt_pd: 12 ; SSE: ## %bb.0: 13 ; SSE-NEXT: sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0] 14 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 15 ; 16 ; AVX1-LABEL: test_x86_sse2_sqrt_pd: 17 ; AVX1: ## %bb.0: 18 ; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0] 19 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 20 ; 21 ; AVX512-LABEL: test_x86_sse2_sqrt_pd: 22 ; AVX512: ## %bb.0: 23 ; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0] 24 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 25 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 26 ret <2 x double> %res 27 } 28 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 29 30 31 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 32 ; SSE-LABEL: test_x86_sse2_sqrt_sd: 33 ; SSE: ## %bb.0: 34 ; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] 35 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 36 ; 37 ; AVX1-LABEL: test_x86_sse2_sqrt_sd: 38 ; AVX1: ## %bb.0: 39 ; AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] 40 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 41 ; 42 ; AVX512-LABEL: test_x86_sse2_sqrt_sd: 43 ; AVX512: ## %bb.0: 44 ; AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 45 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 46 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 47 ret <2 x double> %res 48 } 49 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 50 51 52 define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) { 53 ; X86-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load: 54 ; X86-SSE: ## %bb.0: 55 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 56 ; X86-SSE-NEXT: movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00] 57 ; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] 58 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 59 ; 60 ; X86-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load: 61 ; X86-AVX1: ## %bb.0: 62 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 63 ; X86-AVX1-NEXT: vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00] 64 ; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] 65 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 66 ; 67 ; X86-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load: 68 ; X86-AVX512: ## %bb.0: 69 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 70 ; X86-AVX512-NEXT: vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00] 71 ; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 72 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 73 ; 74 ; X64-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load: 75 ; X64-SSE: ## %bb.0: 76 ; X64-SSE-NEXT: movapd (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x28,0x07] 77 ; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] 78 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 79 ; 80 ; X64-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load: 81 ; X64-AVX1: ## %bb.0: 82 ; X64-AVX1-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x07] 83 ; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] 84 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 85 ; 86 ; X64-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load: 87 ; X64-AVX512: ## %bb.0: 88 ; X64-AVX512-NEXT: vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07] 89 ; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 90 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 91 %a1 = load <2 x double>, <2 x double>* %a0, align 16 92 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1] 93 ret <2 x double> %res 94 } 95 96 97 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { 98 ; SSE-LABEL: test_x86_sse2_psll_dq_bs: 99 ; SSE: ## %bb.0: 100 ; SSE-NEXT: pslldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x07] 101 ; SSE-NEXT: ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8] 102 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 103 ; 104 ; AVX1-LABEL: test_x86_sse2_psll_dq_bs: 105 ; AVX1: ## %bb.0: 106 ; AVX1-NEXT: vpslldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x07] 107 ; AVX1-NEXT: ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8] 108 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 109 ; 110 ; AVX512-LABEL: test_x86_sse2_psll_dq_bs: 111 ; AVX512: ## %bb.0: 112 ; AVX512-NEXT: vpslldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x07] 113 ; AVX512-NEXT: ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8] 114 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 115 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 116 ret <2 x i64> %res 117 } 118 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone 119 120 121 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { 122 ; SSE-LABEL: test_x86_sse2_psrl_dq_bs: 123 ; SSE: ## %bb.0: 124 ; SSE-NEXT: psrldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x07] 125 ; SSE-NEXT: ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero 126 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 127 ; 128 ; AVX1-LABEL: test_x86_sse2_psrl_dq_bs: 129 ; AVX1: ## %bb.0: 130 ; AVX1-NEXT: vpsrldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x07] 131 ; AVX1-NEXT: ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero 132 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 133 ; 134 ; AVX512-LABEL: test_x86_sse2_psrl_dq_bs: 135 ; AVX512: ## %bb.0: 136 ; AVX512-NEXT: vpsrldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x07] 137 ; AVX512-NEXT: ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero 138 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 139 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 140 ret <2 x i64> %res 141 } 142 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone 143 144 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 145 ; SSE-LABEL: test_x86_sse2_psll_dq: 146 ; SSE: ## %bb.0: 147 ; SSE-NEXT: pslldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x01] 148 ; SSE-NEXT: ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 149 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 150 ; 151 ; AVX1-LABEL: test_x86_sse2_psll_dq: 152 ; AVX1: ## %bb.0: 153 ; AVX1-NEXT: vpslldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x01] 154 ; AVX1-NEXT: ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 155 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 156 ; 157 ; AVX512-LABEL: test_x86_sse2_psll_dq: 158 ; AVX512: ## %bb.0: 159 ; AVX512-NEXT: vpslldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01] 160 ; AVX512-NEXT: ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 161 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 162 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 163 ret <2 x i64> %res 164 } 165 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 166 167 168 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 169 ; SSE-LABEL: test_x86_sse2_psrl_dq: 170 ; SSE: ## %bb.0: 171 ; SSE-NEXT: psrldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x01] 172 ; SSE-NEXT: ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 173 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 174 ; 175 ; AVX1-LABEL: test_x86_sse2_psrl_dq: 176 ; AVX1: ## %bb.0: 177 ; AVX1-NEXT: vpsrldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x01] 178 ; AVX1-NEXT: ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 179 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 180 ; 181 ; AVX512-LABEL: test_x86_sse2_psrl_dq: 182 ; AVX512: ## %bb.0: 183 ; AVX512-NEXT: vpsrldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01] 184 ; AVX512-NEXT: ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 185 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 186 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 187 ret <2 x i64> %res 188 } 189 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 190 191 192 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 193 ; SSE-LABEL: test_x86_sse2_cvtdq2pd: 194 ; SSE: ## %bb.0: 195 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0xe6,0xc0] 196 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 197 ; 198 ; AVX1-LABEL: test_x86_sse2_cvtdq2pd: 199 ; AVX1: ## %bb.0: 200 ; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0xe6,0xc0] 201 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 202 ; 203 ; AVX512-LABEL: test_x86_sse2_cvtdq2pd: 204 ; AVX512: ## %bb.0: 205 ; AVX512-NEXT: vcvtdq2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0] 206 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 207 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 208 ret <2 x double> %res 209 } 210 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 211 212 213 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 214 ; SSE-LABEL: test_x86_sse2_cvtps2pd: 215 ; SSE: ## %bb.0: 216 ; SSE-NEXT: cvtps2pd %xmm0, %xmm0 ## encoding: [0x0f,0x5a,0xc0] 217 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 218 ; 219 ; AVX1-LABEL: test_x86_sse2_cvtps2pd: 220 ; AVX1: ## %bb.0: 221 ; AVX1-NEXT: vcvtps2pd %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5a,0xc0] 222 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 223 ; 224 ; AVX512-LABEL: test_x86_sse2_cvtps2pd: 225 ; AVX512: ## %bb.0: 226 ; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] 227 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 228 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 229 ret <2 x double> %res 230 } 231 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 232 233 234 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 235 ; X86-SSE-LABEL: test_x86_sse2_storel_dq: 236 ; X86-SSE: ## %bb.0: 237 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 238 ; X86-SSE-NEXT: movlps %xmm0, (%eax) ## encoding: [0x0f,0x13,0x00] 239 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 240 ; 241 ; X86-AVX1-LABEL: test_x86_sse2_storel_dq: 242 ; X86-AVX1: ## %bb.0: 243 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 244 ; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x13,0x00] 245 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 246 ; 247 ; X86-AVX512-LABEL: test_x86_sse2_storel_dq: 248 ; X86-AVX512: ## %bb.0: 249 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 250 ; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 251 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 252 ; 253 ; X64-SSE-LABEL: test_x86_sse2_storel_dq: 254 ; X64-SSE: ## %bb.0: 255 ; X64-SSE-NEXT: movlps %xmm0, (%rdi) ## encoding: [0x0f,0x13,0x07] 256 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 257 ; 258 ; X64-AVX1-LABEL: test_x86_sse2_storel_dq: 259 ; X64-AVX1: ## %bb.0: 260 ; X64-AVX1-NEXT: vmovlps %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x13,0x07] 261 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 262 ; 263 ; X64-AVX512-LABEL: test_x86_sse2_storel_dq: 264 ; X64-AVX512: ## %bb.0: 265 ; X64-AVX512-NEXT: vmovlps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07] 266 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 267 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 268 ret void 269 } 270 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 271 272 273 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 274 ; add operation forces the execution domain. 275 ; X86-SSE-LABEL: test_x86_sse2_storeu_dq: 276 ; X86-SSE: ## %bb.0: 277 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 278 ; X86-SSE-NEXT: pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9] 279 ; X86-SSE-NEXT: psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1] 280 ; X86-SSE-NEXT: movdqu %xmm0, (%eax) ## encoding: [0xf3,0x0f,0x7f,0x00] 281 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 282 ; 283 ; X86-AVX1-LABEL: test_x86_sse2_storeu_dq: 284 ; X86-AVX1: ## %bb.0: 285 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 286 ; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9] 287 ; X86-AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1] 288 ; X86-AVX1-NEXT: vmovdqu %xmm0, (%eax) ## encoding: [0xc5,0xfa,0x7f,0x00] 289 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 290 ; 291 ; X86-AVX512-LABEL: test_x86_sse2_storeu_dq: 292 ; X86-AVX512: ## %bb.0: 293 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 294 ; X86-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9] 295 ; X86-AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 296 ; X86-AVX512-NEXT: vmovdqu %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 297 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 298 ; 299 ; X64-SSE-LABEL: test_x86_sse2_storeu_dq: 300 ; X64-SSE: ## %bb.0: 301 ; X64-SSE-NEXT: pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9] 302 ; X64-SSE-NEXT: psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1] 303 ; X64-SSE-NEXT: movdqu %xmm0, (%rdi) ## encoding: [0xf3,0x0f,0x7f,0x07] 304 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 305 ; 306 ; X64-AVX1-LABEL: test_x86_sse2_storeu_dq: 307 ; X64-AVX1: ## %bb.0: 308 ; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9] 309 ; X64-AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1] 310 ; X64-AVX1-NEXT: vmovdqu %xmm0, (%rdi) ## encoding: [0xc5,0xfa,0x7f,0x07] 311 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 312 ; 313 ; X64-AVX512-LABEL: test_x86_sse2_storeu_dq: 314 ; X64-AVX512: ## %bb.0: 315 ; X64-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9] 316 ; X64-AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 317 ; X64-AVX512-NEXT: vmovdqu %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07] 318 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 319 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 320 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 321 ret void 322 } 323 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 324 325 326 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 327 ; fadd operation forces the execution domain. 328 ; X86-SSE-LABEL: test_x86_sse2_storeu_pd: 329 ; X86-SSE: ## %bb.0: 330 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 331 ; X86-SSE-NEXT: xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9] 332 ; X86-SSE-NEXT: movhpd LCPI11_0, %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A] 333 ; X86-SSE-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4 334 ; X86-SSE-NEXT: ## xmm1 = xmm1[0],mem[0] 335 ; X86-SSE-NEXT: addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8] 336 ; X86-SSE-NEXT: movupd %xmm1, (%eax) ## encoding: [0x66,0x0f,0x11,0x08] 337 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 338 ; 339 ; X86-AVX1-LABEL: test_x86_sse2_storeu_pd: 340 ; X86-AVX1: ## %bb.0: 341 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 342 ; X86-AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] 343 ; X86-AVX1-NEXT: vmovhpd LCPI11_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 344 ; X86-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4 345 ; X86-AVX1-NEXT: ## xmm1 = xmm1[0],mem[0] 346 ; X86-AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1] 347 ; X86-AVX1-NEXT: vmovupd %xmm0, (%eax) ## encoding: [0xc5,0xf9,0x11,0x00] 348 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 349 ; 350 ; X86-AVX512-LABEL: test_x86_sse2_storeu_pd: 351 ; X86-AVX512: ## %bb.0: 352 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 353 ; X86-AVX512-NEXT: vmovsd LCPI11_0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A] 354 ; X86-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4 355 ; X86-AVX512-NEXT: ## xmm1 = mem[0],zero 356 ; X86-AVX512-NEXT: vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08] 357 ; X86-AVX512-NEXT: ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 358 ; X86-AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 359 ; X86-AVX512-NEXT: vmovupd %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00] 360 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 361 ; 362 ; X64-SSE-LABEL: test_x86_sse2_storeu_pd: 363 ; X64-SSE: ## %bb.0: 364 ; X64-SSE-NEXT: xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9] 365 ; X64-SSE-NEXT: movhpd {{.*}}(%rip), %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A] 366 ; X64-SSE-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte 367 ; X64-SSE-NEXT: ## xmm1 = xmm1[0],mem[0] 368 ; X64-SSE-NEXT: addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8] 369 ; X64-SSE-NEXT: movupd %xmm1, (%rdi) ## encoding: [0x66,0x0f,0x11,0x0f] 370 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 371 ; 372 ; X64-AVX1-LABEL: test_x86_sse2_storeu_pd: 373 ; X64-AVX1: ## %bb.0: 374 ; X64-AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] 375 ; X64-AVX1-NEXT: vmovhpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 376 ; X64-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte 377 ; X64-AVX1-NEXT: ## xmm1 = xmm1[0],mem[0] 378 ; X64-AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1] 379 ; X64-AVX1-NEXT: vmovupd %xmm0, (%rdi) ## encoding: [0xc5,0xf9,0x11,0x07] 380 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 381 ; 382 ; X64-AVX512-LABEL: test_x86_sse2_storeu_pd: 383 ; X64-AVX512: ## %bb.0: 384 ; X64-AVX512-NEXT: vmovsd {{.*}}(%rip), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A] 385 ; X64-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte 386 ; X64-AVX512-NEXT: ## xmm1 = mem[0],zero 387 ; X64-AVX512-NEXT: vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08] 388 ; X64-AVX512-NEXT: ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 389 ; X64-AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 390 ; X64-AVX512-NEXT: vmovupd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07] 391 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 392 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 393 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 394 ret void 395 } 396 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 397 398 define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) { 399 ; SSE-LABEL: test_x86_sse2_pshuf_d: 400 ; SSE: ## %bb.0: ## %entry 401 ; SSE-NEXT: pshufd $27, %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x70,0xc0,0x1b] 402 ; SSE-NEXT: ## xmm0 = xmm0[3,2,1,0] 403 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 404 ; 405 ; AVX1-LABEL: test_x86_sse2_pshuf_d: 406 ; AVX1: ## %bb.0: ## %entry 407 ; AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 408 ; AVX1-NEXT: ## xmm0 = xmm0[3,2,1,0] 409 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 410 ; 411 ; AVX512-LABEL: test_x86_sse2_pshuf_d: 412 ; AVX512: ## %bb.0: ## %entry 413 ; AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 414 ; AVX512-NEXT: ## xmm0 = xmm0[3,2,1,0] 415 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 416 entry: 417 %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone 418 ret <4 x i32> %res 419 } 420 declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone 421 422 define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) { 423 ; SSE-LABEL: test_x86_sse2_pshufl_w: 424 ; SSE: ## %bb.0: ## %entry 425 ; SSE-NEXT: pshuflw $27, %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x70,0xc0,0x1b] 426 ; SSE-NEXT: ## xmm0 = xmm0[3,2,1,0,4,5,6,7] 427 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 428 ; 429 ; AVX1-LABEL: test_x86_sse2_pshufl_w: 430 ; AVX1: ## %bb.0: ## %entry 431 ; AVX1-NEXT: vpshuflw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x70,0xc0,0x1b] 432 ; AVX1-NEXT: ## xmm0 = xmm0[3,2,1,0,4,5,6,7] 433 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 434 ; 435 ; AVX512-LABEL: test_x86_sse2_pshufl_w: 436 ; AVX512: ## %bb.0: ## %entry 437 ; AVX512-NEXT: vpshuflw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x1b] 438 ; AVX512-NEXT: ## xmm0 = xmm0[3,2,1,0,4,5,6,7] 439 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 440 entry: 441 %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone 442 ret <8 x i16> %res 443 } 444 declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone 445 446 define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) { 447 ; SSE-LABEL: test_x86_sse2_pshufh_w: 448 ; SSE: ## %bb.0: ## %entry 449 ; SSE-NEXT: pshufhw $27, %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x70,0xc0,0x1b] 450 ; SSE-NEXT: ## xmm0 = xmm0[0,1,2,3,7,6,5,4] 451 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 452 ; 453 ; AVX1-LABEL: test_x86_sse2_pshufh_w: 454 ; AVX1: ## %bb.0: ## %entry 455 ; AVX1-NEXT: vpshufhw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x70,0xc0,0x1b] 456 ; AVX1-NEXT: ## xmm0 = xmm0[0,1,2,3,7,6,5,4] 457 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 458 ; 459 ; AVX512-LABEL: test_x86_sse2_pshufh_w: 460 ; AVX512: ## %bb.0: ## %entry 461 ; AVX512-NEXT: vpshufhw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x1b] 462 ; AVX512-NEXT: ## xmm0 = xmm0[0,1,2,3,7,6,5,4] 463 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 464 entry: 465 %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone 466 ret <8 x i16> %res 467 } 468 declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone 469 470 define <16 x i8> @max_epu8(<16 x i8> %a0, <16 x i8> %a1) { 471 ; SSE-LABEL: max_epu8: 472 ; SSE: ## %bb.0: 473 ; SSE-NEXT: pmaxub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xde,0xc1] 474 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 475 ; 476 ; AVX1-LABEL: max_epu8: 477 ; AVX1: ## %bb.0: 478 ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xde,0xc1] 479 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 480 ; 481 ; AVX512-LABEL: max_epu8: 482 ; AVX512: ## %bb.0: 483 ; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1] 484 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 485 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) 486 ret <16 x i8> %res 487 } 488 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 489 490 define <16 x i8> @min_epu8(<16 x i8> %a0, <16 x i8> %a1) { 491 ; SSE-LABEL: min_epu8: 492 ; SSE: ## %bb.0: 493 ; SSE-NEXT: pminub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xda,0xc1] 494 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 495 ; 496 ; AVX1-LABEL: min_epu8: 497 ; AVX1: ## %bb.0: 498 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xda,0xc1] 499 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 500 ; 501 ; AVX512-LABEL: min_epu8: 502 ; AVX512: ## %bb.0: 503 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1] 504 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 505 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) 506 ret <16 x i8> %res 507 } 508 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 509 510 define <8 x i16> @max_epi16(<8 x i16> %a0, <8 x i16> %a1) { 511 ; SSE-LABEL: max_epi16: 512 ; SSE: ## %bb.0: 513 ; SSE-NEXT: pmaxsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xee,0xc1] 514 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 515 ; 516 ; AVX1-LABEL: max_epi16: 517 ; AVX1: ## %bb.0: 518 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xee,0xc1] 519 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 520 ; 521 ; AVX512-LABEL: max_epi16: 522 ; AVX512: ## %bb.0: 523 ; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1] 524 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 525 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) 526 ret <8 x i16> %res 527 } 528 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 529 530 define <8 x i16> @min_epi16(<8 x i16> %a0, <8 x i16> %a1) { 531 ; SSE-LABEL: min_epi16: 532 ; SSE: ## %bb.0: 533 ; SSE-NEXT: pminsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xea,0xc1] 534 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 535 ; 536 ; AVX1-LABEL: min_epi16: 537 ; AVX1: ## %bb.0: 538 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xea,0xc1] 539 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 540 ; 541 ; AVX512-LABEL: min_epi16: 542 ; AVX512: ## %bb.0: 543 ; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1] 544 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 545 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) 546 ret <8 x i16> %res 547 } 548 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 549 550 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 551 ; SSE-LABEL: test_x86_sse2_add_sd: 552 ; SSE: ## %bb.0: 553 ; SSE-NEXT: addsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x58,0xc1] 554 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 555 ; 556 ; AVX1-LABEL: test_x86_sse2_add_sd: 557 ; AVX1: ## %bb.0: 558 ; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1] 559 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 560 ; 561 ; AVX512-LABEL: test_x86_sse2_add_sd: 562 ; AVX512: ## %bb.0: 563 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1] 564 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 565 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 566 ret <2 x double> %res 567 } 568 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 569 570 571 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 572 ; SSE-LABEL: test_x86_sse2_sub_sd: 573 ; SSE: ## %bb.0: 574 ; SSE-NEXT: subsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5c,0xc1] 575 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 576 ; 577 ; AVX1-LABEL: test_x86_sse2_sub_sd: 578 ; AVX1: ## %bb.0: 579 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1] 580 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 581 ; 582 ; AVX512-LABEL: test_x86_sse2_sub_sd: 583 ; AVX512: ## %bb.0: 584 ; AVX512-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1] 585 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 586 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 587 ret <2 x double> %res 588 } 589 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 590 591 592 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 593 ; SSE-LABEL: test_x86_sse2_mul_sd: 594 ; SSE: ## %bb.0: 595 ; SSE-NEXT: mulsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x59,0xc1] 596 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 597 ; 598 ; AVX1-LABEL: test_x86_sse2_mul_sd: 599 ; AVX1: ## %bb.0: 600 ; AVX1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1] 601 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 602 ; 603 ; AVX512-LABEL: test_x86_sse2_mul_sd: 604 ; AVX512: ## %bb.0: 605 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1] 606 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 607 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 608 ret <2 x double> %res 609 } 610 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 611 612 613 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 614 ; SSE-LABEL: test_x86_sse2_div_sd: 615 ; SSE: ## %bb.0: 616 ; SSE-NEXT: divsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5e,0xc1] 617 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 618 ; 619 ; AVX1-LABEL: test_x86_sse2_div_sd: 620 ; AVX1: ## %bb.0: 621 ; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1] 622 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 623 ; 624 ; AVX512-LABEL: test_x86_sse2_div_sd: 625 ; AVX512: ## %bb.0: 626 ; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1] 627 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 628 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 629 ret <2 x double> %res 630 } 631 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 632 633 define <16 x i8> @mm_avg_epu8(<16 x i8> %a0, <16 x i8> %a1) { 634 ; SSE-LABEL: mm_avg_epu8: 635 ; SSE: ## %bb.0: 636 ; SSE-NEXT: pavgb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe0,0xc1] 637 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 638 ; 639 ; AVX1-LABEL: mm_avg_epu8: 640 ; AVX1: ## %bb.0: 641 ; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe0,0xc1] 642 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 643 ; 644 ; AVX512-LABEL: mm_avg_epu8: 645 ; AVX512: ## %bb.0: 646 ; AVX512-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1] 647 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 648 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 649 ret <16 x i8> %res 650 } 651 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 652 653 define <8 x i16> @mm_avg_epu16(<8 x i16> %a0, <8 x i16> %a1) { 654 ; SSE-LABEL: mm_avg_epu16: 655 ; SSE: ## %bb.0: 656 ; SSE-NEXT: pavgw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe3,0xc1] 657 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 658 ; 659 ; AVX1-LABEL: mm_avg_epu16: 660 ; AVX1: ## %bb.0: 661 ; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe3,0xc1] 662 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 663 ; 664 ; AVX512-LABEL: mm_avg_epu16: 665 ; AVX512: ## %bb.0: 666 ; AVX512-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1] 667 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 668 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 669 ret <8 x i16> %res 670 } 671 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 672 673 674 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 675 ; SSE-LABEL: test_x86_sse2_pmulu_dq: 676 ; SSE: ## %bb.0: 677 ; SSE-NEXT: pmuludq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf4,0xc1] 678 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 679 ; 680 ; AVX1-LABEL: test_x86_sse2_pmulu_dq: 681 ; AVX1: ## %bb.0: 682 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1] 683 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 684 ; 685 ; AVX512-LABEL: test_x86_sse2_pmulu_dq: 686 ; AVX512: ## %bb.0: 687 ; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1] 688 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 689 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 690 ret <2 x i64> %res 691 } 692 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 693 694 695 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) { 696 ; X86-SSE-LABEL: test_x86_sse2_cvtsi2sd: 697 ; X86-SSE: ## %bb.0: 698 ; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04] 699 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 700 ; 701 ; X86-AVX1-LABEL: test_x86_sse2_cvtsi2sd: 702 ; X86-AVX1: ## %bb.0: 703 ; X86-AVX1-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] 704 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 705 ; 706 ; X86-AVX512-LABEL: test_x86_sse2_cvtsi2sd: 707 ; X86-AVX512: ## %bb.0: 708 ; X86-AVX512-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] 709 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 710 ; 711 ; X64-SSE-LABEL: test_x86_sse2_cvtsi2sd: 712 ; X64-SSE: ## %bb.0: 713 ; X64-SSE-NEXT: cvtsi2sdl %edi, %xmm0 ## encoding: [0xf2,0x0f,0x2a,0xc7] 714 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 715 ; 716 ; X64-AVX1-LABEL: test_x86_sse2_cvtsi2sd: 717 ; X64-AVX1: ## %bb.0: 718 ; X64-AVX1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0xc7] 719 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 720 ; 721 ; X64-AVX512-LABEL: test_x86_sse2_cvtsi2sd: 722 ; X64-AVX512: ## %bb.0: 723 ; X64-AVX512-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7] 724 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 725 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1] 726 ret <2 x double> %res 727 } 728 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 729 730 731 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 732 ; SSE-LABEL: test_x86_sse2_cvtss2sd: 733 ; SSE: ## %bb.0: 734 ; SSE-NEXT: cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1] 735 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 736 ; 737 ; AVX1-LABEL: test_x86_sse2_cvtss2sd: 738 ; AVX1: ## %bb.0: 739 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0xc1] 740 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 741 ; 742 ; AVX512-LABEL: test_x86_sse2_cvtss2sd: 743 ; AVX512: ## %bb.0: 744 ; AVX512-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1] 745 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 746 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 747 ret <2 x double> %res 748 } 749 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 750 751 752 define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) { 753 ; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load: 754 ; X86-SSE: ## %bb.0: 755 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 756 ; X86-SSE-NEXT: movss (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x08] 757 ; X86-SSE-NEXT: ## xmm1 = mem[0],zero,zero,zero 758 ; X86-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9] 759 ; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] 760 ; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] 761 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 762 ; 763 ; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load: 764 ; X86-AVX1: ## %bb.0: 765 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 766 ; X86-AVX1-NEXT: vmovss (%eax), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x08] 767 ; X86-AVX1-NEXT: ## xmm1 = mem[0],zero,zero,zero 768 ; X86-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9] 769 ; X86-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 770 ; X86-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] 771 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 772 ; 773 ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load: 774 ; X86-AVX512: ## %bb.0: 775 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 776 ; X86-AVX512-NEXT: vmovss (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x08] 777 ; X86-AVX512-NEXT: ## xmm1 = mem[0],zero,zero,zero 778 ; X86-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9] 779 ; X86-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 780 ; X86-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] 781 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 782 ; 783 ; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load: 784 ; X64-SSE: ## %bb.0: 785 ; X64-SSE-NEXT: movss (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x0f] 786 ; X64-SSE-NEXT: ## xmm1 = mem[0],zero,zero,zero 787 ; X64-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9] 788 ; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] 789 ; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] 790 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 791 ; 792 ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load: 793 ; X64-AVX1: ## %bb.0: 794 ; X64-AVX1-NEXT: vmovss (%rdi), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x0f] 795 ; X64-AVX1-NEXT: ## xmm1 = mem[0],zero,zero,zero 796 ; X64-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9] 797 ; X64-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 798 ; X64-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] 799 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 800 ; 801 ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load: 802 ; X64-AVX512: ## %bb.0: 803 ; X64-AVX512-NEXT: vmovss (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0f] 804 ; X64-AVX512-NEXT: ## xmm1 = mem[0],zero,zero,zero 805 ; X64-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9] 806 ; X64-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 807 ; X64-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] 808 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 809 %a1 = load <4 x float>, <4 x float>* %p1 810 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 811 ret <2 x double> %res 812 } 813 814 815 define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x float>* %p1) optsize { 816 ; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 817 ; X86-SSE: ## %bb.0: 818 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 819 ; X86-SSE-NEXT: cvtss2sd (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x08] 820 ; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] 821 ; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] 822 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 823 ; 824 ; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 825 ; X86-AVX1: ## %bb.0: 826 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 827 ; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08] 828 ; X86-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1] 829 ; X86-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1] 830 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 831 ; 832 ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 833 ; X86-AVX512: ## %bb.0: 834 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 835 ; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08] 836 ; X86-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] 837 ; X86-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1] 838 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 839 ; 840 ; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 841 ; X64-SSE: ## %bb.0: 842 ; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x0f] 843 ; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] 844 ; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] 845 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 846 ; 847 ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 848 ; X64-AVX1: ## %bb.0: 849 ; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f] 850 ; X64-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1] 851 ; X64-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1] 852 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 853 ; 854 ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 855 ; X64-AVX512: ## %bb.0: 856 ; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f] 857 ; X64-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] 858 ; X64-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1] 859 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 860 %a1 = load <4 x float>, <4 x float>* %p1 861 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 862 ret <2 x double> %res 863 } 864 865 866 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 867 ; SSE-LABEL: test_x86_sse2_cvtdq2ps: 868 ; SSE: ## %bb.0: 869 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ## encoding: [0x0f,0x5b,0xc0] 870 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 871 ; 872 ; AVX1-LABEL: test_x86_sse2_cvtdq2ps: 873 ; AVX1: ## %bb.0: 874 ; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0] 875 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 876 ; 877 ; AVX512-LABEL: test_x86_sse2_cvtdq2ps: 878 ; AVX512: ## %bb.0: 879 ; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] 880 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 881 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 882 ret <4 x float> %res 883 } 884 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 885