1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9 10 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { 11 ; SSE-LABEL: test_x86_sse_sqrt_ps: 12 ; SSE: ## %bb.0: 13 ; SSE-NEXT: sqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x51,0xc0] 14 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 15 ; 16 ; AVX1-LABEL: test_x86_sse_sqrt_ps: 17 ; AVX1: ## %bb.0: 18 ; AVX1-NEXT: vsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x51,0xc0] 19 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 20 ; 21 ; AVX512-LABEL: test_x86_sse_sqrt_ps: 22 ; AVX512: ## %bb.0: 23 ; AVX512-NEXT: vsqrtps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0] 24 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 25 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 26 ret <4 x float> %res 27 } 28 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 29 30 31 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { 32 ; SSE-LABEL: test_x86_sse_sqrt_ss: 33 ; SSE: ## %bb.0: 34 ; SSE-NEXT: sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0] 35 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 36 ; 37 ; AVX1-LABEL: test_x86_sse_sqrt_ss: 38 ; AVX1: ## %bb.0: 39 ; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0] 40 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 41 ; 42 ; AVX512-LABEL: test_x86_sse_sqrt_ss: 43 ; AVX512: ## %bb.0: 44 ; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 45 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 46 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 47 ret <4 x float> %res 48 } 49 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 50 51 52 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 53 ; X86-SSE-LABEL: test_x86_sse_storeu_ps: 54 ; X86-SSE: ## %bb.0: 55 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 56 ; X86-SSE-NEXT: movups %xmm0, (%eax) ## encoding: [0x0f,0x11,0x00] 57 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 58 ; 59 ; X86-AVX1-LABEL: test_x86_sse_storeu_ps: 60 ; X86-AVX1: ## %bb.0: 61 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 62 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x11,0x00] 63 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 64 ; 65 ; X86-AVX512-LABEL: test_x86_sse_storeu_ps: 66 ; X86-AVX512: ## %bb.0: 67 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 68 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 69 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 70 ; 71 ; X64-SSE-LABEL: test_x86_sse_storeu_ps: 72 ; X64-SSE: ## %bb.0: 73 ; X64-SSE-NEXT: movups %xmm0, (%rdi) ## encoding: [0x0f,0x11,0x07] 74 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 75 ; 76 ; X64-AVX1-LABEL: test_x86_sse_storeu_ps: 77 ; X64-AVX1: ## %bb.0: 78 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x11,0x07] 79 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 80 ; 81 ; X64-AVX512-LABEL: test_x86_sse_storeu_ps: 82 ; X64-AVX512: ## %bb.0: 83 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 84 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 85 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 86 ret void 87 } 88 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 89 90 91 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { 92 ; SSE-LABEL: test_x86_sse_add_ss: 93 ; SSE: ## %bb.0: 94 ; SSE-NEXT: addss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc1] 95 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 96 ; 97 ; AVX1-LABEL: test_x86_sse_add_ss: 98 ; AVX1: ## %bb.0: 99 ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0xc1] 100 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 101 ; 102 ; AVX512-LABEL: test_x86_sse_add_ss: 103 ; AVX512: ## %bb.0: 104 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1] 105 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 106 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 107 ret <4 x float> %res 108 } 109 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 110 111 112 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { 113 ; SSE-LABEL: test_x86_sse_sub_ss: 114 ; SSE: ## %bb.0: 115 ; SSE-NEXT: subss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5c,0xc1] 116 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 117 ; 118 ; AVX1-LABEL: test_x86_sse_sub_ss: 119 ; AVX1: ## %bb.0: 120 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5c,0xc1] 121 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 122 ; 123 ; AVX512-LABEL: test_x86_sse_sub_ss: 124 ; AVX512: ## %bb.0: 125 ; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1] 126 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 127 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 128 ret <4 x float> %res 129 } 130 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 131 132 133 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { 134 ; SSE-LABEL: test_x86_sse_mul_ss: 135 ; SSE: ## %bb.0: 136 ; SSE-NEXT: mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1] 137 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 138 ; 139 ; AVX1-LABEL: test_x86_sse_mul_ss: 140 ; AVX1: ## %bb.0: 141 ; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0xc1] 142 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 143 ; 144 ; AVX512-LABEL: test_x86_sse_mul_ss: 145 ; AVX512: ## %bb.0: 146 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1] 147 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 148 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 149 ret <4 x float> %res 150 } 151 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 152 153 154 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { 155 ; SSE-LABEL: test_x86_sse_div_ss: 156 ; SSE: ## %bb.0: 157 ; SSE-NEXT: divss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5e,0xc1] 158 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 159 ; 160 ; AVX1-LABEL: test_x86_sse_div_ss: 161 ; AVX1: ## %bb.0: 162 ; AVX1-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5e,0xc1] 163 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 164 ; 165 ; AVX512-LABEL: test_x86_sse_div_ss: 166 ; AVX512: ## %bb.0: 167 ; AVX512-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1] 168 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 169 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 170 ret <4 x float> %res 171 } 172 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 173 174 175 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0, i32 %a1) { 176 ; X86-SSE-LABEL: test_x86_sse_cvtsi2ss: 177 ; X86-SSE: ## %bb.0: 178 ; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04] 179 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 180 ; 181 ; X86-AVX1-LABEL: test_x86_sse_cvtsi2ss: 182 ; X86-AVX1: ## %bb.0: 183 ; X86-AVX1-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 184 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 185 ; 186 ; X86-AVX512-LABEL: test_x86_sse_cvtsi2ss: 187 ; X86-AVX512: ## %bb.0: 188 ; X86-AVX512-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 189 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 190 ; 191 ; X64-SSE-LABEL: test_x86_sse_cvtsi2ss: 192 ; X64-SSE: ## %bb.0: 193 ; X64-SSE-NEXT: cvtsi2ssl %edi, %xmm0 ## encoding: [0xf3,0x0f,0x2a,0xc7] 194 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 195 ; 196 ; X64-AVX1-LABEL: test_x86_sse_cvtsi2ss: 197 ; X64-AVX1: ## %bb.0: 198 ; X64-AVX1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x2a,0xc7] 199 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 200 ; 201 ; X64-AVX512-LABEL: test_x86_sse_cvtsi2ss: 202 ; X64-AVX512: ## %bb.0: 203 ; X64-AVX512-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7] 204 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 205 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) ; <<4 x float>> [#uses=1] 206 ret <4 x float> %res 207 } 208 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 209