1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 10 ; SSE-LABEL: test_x86_sse41_blendvpd: 11 ; SSE: ## %bb.0: 12 ; SSE-NEXT: movapd %xmm0, %xmm3 ## encoding: [0x66,0x0f,0x28,0xd8] 13 ; SSE-NEXT: movaps %xmm2, %xmm0 ## encoding: [0x0f,0x28,0xc2] 14 ; SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 ## encoding: [0x66,0x0f,0x38,0x15,0xd9] 15 ; SSE-NEXT: movapd %xmm3, %xmm0 ## encoding: [0x66,0x0f,0x28,0xc3] 16 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 17 ; 18 ; AVX-LABEL: test_x86_sse41_blendvpd: 19 ; AVX: ## %bb.0: 20 ; AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x4b,0xc1,0x20] 21 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 22 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 23 ret <2 x double> %res 24 } 25 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 26 27 28 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 29 ; SSE-LABEL: test_x86_sse41_blendvps: 30 ; SSE: ## %bb.0: 31 ; SSE-NEXT: movaps %xmm0, %xmm3 ## encoding: [0x0f,0x28,0xd8] 32 ; SSE-NEXT: movaps %xmm2, %xmm0 ## encoding: [0x0f,0x28,0xc2] 33 ; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 ## encoding: [0x66,0x0f,0x38,0x14,0xd9] 34 ; SSE-NEXT: movaps %xmm3, %xmm0 ## encoding: [0x0f,0x28,0xc3] 35 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 36 ; 37 ; AVX-LABEL: test_x86_sse41_blendvps: 38 ; AVX: ## %bb.0: 39 ; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x4a,0xc1,0x20] 40 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 41 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 42 ret <4 x float> %res 43 } 44 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 45 46 47 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 48 ; SSE-LABEL: test_x86_sse41_dppd: 49 ; SSE: ## %bb.0: 50 ; SSE-NEXT: dppd $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x41,0xc1,0x07] 51 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 52 ; 53 ; AVX-LABEL: test_x86_sse41_dppd: 54 ; AVX: ## %bb.0: 55 ; AVX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x41,0xc1,0x07] 56 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 57 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 58 ret <2 x double> %res 59 } 60 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 61 62 63 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 64 ; SSE-LABEL: test_x86_sse41_dpps: 65 ; SSE: ## %bb.0: 66 ; SSE-NEXT: dpps $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x40,0xc1,0x07] 67 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 68 ; 69 ; AVX-LABEL: test_x86_sse41_dpps: 70 ; AVX: ## %bb.0: 71 ; AVX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x40,0xc1,0x07] 72 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 73 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 74 ret <4 x float> %res 75 } 76 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 77 78 79 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 80 ; SSE-LABEL: test_x86_sse41_insertps: 81 ; SSE: ## %bb.0: 82 ; SSE-NEXT: insertps $17, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x21,0xc1,0x11] 83 ; SSE-NEXT: ## xmm0 = zero,xmm1[0],xmm0[2,3] 84 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 85 ; 86 ; AVX1-LABEL: test_x86_sse41_insertps: 87 ; AVX1: ## %bb.0: 88 ; AVX1-NEXT: vinsertps $17, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11] 89 ; AVX1-NEXT: ## xmm0 = zero,xmm1[0],xmm0[2,3] 90 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 91 ; 92 ; AVX512-LABEL: test_x86_sse41_insertps: 93 ; AVX512: ## %bb.0: 94 ; AVX512-NEXT: vinsertps $17, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11] 95 ; AVX512-NEXT: ## xmm0 = zero,xmm1[0],xmm0[2,3] 96 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 97 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) ; <<4 x float>> [#uses=1] 98 ret <4 x float> %res 99 } 100 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 101 102 103 104 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 105 ; SSE-LABEL: test_x86_sse41_mpsadbw: 106 ; SSE: ## %bb.0: 107 ; SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0xc1,0x07] 108 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 109 ; 110 ; AVX-LABEL: test_x86_sse41_mpsadbw: 111 ; AVX: ## %bb.0: 112 ; AVX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0xc1,0x07] 113 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 114 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] 115 ret <8 x i16> %res 116 } 117 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 118 119 120 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 121 ; SSE-LABEL: test_x86_sse41_packusdw: 122 ; SSE: ## %bb.0: 123 ; SSE-NEXT: packusdw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x2b,0xc1] 124 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 125 ; 126 ; AVX1-LABEL: test_x86_sse41_packusdw: 127 ; AVX1: ## %bb.0: 128 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2b,0xc1] 129 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 130 ; 131 ; AVX512-LABEL: test_x86_sse41_packusdw: 132 ; AVX512: ## %bb.0: 133 ; AVX512-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1] 134 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 135 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 136 ret <8 x i16> %res 137 } 138 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 139 140 141 define <8 x i16> @test_x86_sse41_packusdw_fold() { 142 ; X86-SSE-LABEL: test_x86_sse41_packusdw_fold: 143 ; X86-SSE: ## %bb.0: 144 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0] 145 ; X86-SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] 146 ; X86-SSE-NEXT: ## fixup A - offset: 3, value: LCPI7_0, kind: FK_Data_4 147 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 148 ; 149 ; X86-AVX1-LABEL: test_x86_sse41_packusdw_fold: 150 ; X86-AVX1: ## %bb.0: 151 ; X86-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0] 152 ; X86-AVX1-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 153 ; X86-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI7_0, kind: FK_Data_4 154 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 155 ; 156 ; X86-AVX512-LABEL: test_x86_sse41_packusdw_fold: 157 ; X86-AVX512: ## %bb.0: 158 ; X86-AVX512-NEXT: vmovaps LCPI7_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,65535,65535,0,0] 159 ; X86-AVX512-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 160 ; X86-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI7_0, kind: FK_Data_4 161 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 162 ; 163 ; X64-SSE-LABEL: test_x86_sse41_packusdw_fold: 164 ; X64-SSE: ## %bb.0: 165 ; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0] 166 ; X64-SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] 167 ; X64-SSE-NEXT: ## fixup A - offset: 3, value: LCPI7_0-4, kind: reloc_riprel_4byte 168 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 169 ; 170 ; X64-AVX1-LABEL: test_x86_sse41_packusdw_fold: 171 ; X64-AVX1: ## %bb.0: 172 ; X64-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0] 173 ; X64-AVX1-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 174 ; X64-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI7_0-4, kind: reloc_riprel_4byte 175 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 176 ; 177 ; X64-AVX512-LABEL: test_x86_sse41_packusdw_fold: 178 ; X64-AVX512: ## %bb.0: 179 ; X64-AVX512-NEXT: vmovaps {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,65535,65535,0,0] 180 ; X64-AVX512-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 181 ; X64-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI7_0-4, kind: reloc_riprel_4byte 182 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 183 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>) 184 ret <8 x i16> %res 185 } 186 187 188 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 189 ; SSE-LABEL: test_x86_sse41_pblendvb: 190 ; SSE: ## %bb.0: 191 ; SSE-NEXT: movdqa %xmm0, %xmm3 ## encoding: [0x66,0x0f,0x6f,0xd8] 192 ; SSE-NEXT: movaps %xmm2, %xmm0 ## encoding: [0x0f,0x28,0xc2] 193 ; SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 ## encoding: [0x66,0x0f,0x38,0x10,0xd9] 194 ; SSE-NEXT: movdqa %xmm3, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc3] 195 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 196 ; 197 ; AVX-LABEL: test_x86_sse41_pblendvb: 198 ; AVX: ## %bb.0: 199 ; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x4c,0xc1,0x20] 200 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 201 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 202 ret <16 x i8> %res 203 } 204 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 205 206 207 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 208 ; SSE-LABEL: test_x86_sse41_phminposuw: 209 ; SSE: ## %bb.0: 210 ; SSE-NEXT: phminposuw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x41,0xc0] 211 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 212 ; 213 ; AVX-LABEL: test_x86_sse41_phminposuw: 214 ; AVX: ## %bb.0: 215 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x41,0xc0] 216 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 217 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 218 ret <8 x i16> %res 219 } 220 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 221 222 223 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 224 ; SSE-LABEL: test_x86_sse41_pmaxsb: 225 ; SSE: ## %bb.0: 226 ; SSE-NEXT: pmaxsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3c,0xc1] 227 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 228 ; 229 ; AVX1-LABEL: test_x86_sse41_pmaxsb: 230 ; AVX1: ## %bb.0: 231 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3c,0xc1] 232 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 233 ; 234 ; AVX512-LABEL: test_x86_sse41_pmaxsb: 235 ; AVX512: ## %bb.0: 236 ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3c,0xc1] 237 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 238 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 239 ret <16 x i8> %res 240 } 241 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 242 243 244 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 245 ; SSE-LABEL: test_x86_sse41_pmaxsd: 246 ; SSE: ## %bb.0: 247 ; SSE-NEXT: pmaxsd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3d,0xc1] 248 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 249 ; 250 ; AVX1-LABEL: test_x86_sse41_pmaxsd: 251 ; AVX1: ## %bb.0: 252 ; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3d,0xc1] 253 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 254 ; 255 ; AVX512-LABEL: test_x86_sse41_pmaxsd: 256 ; AVX512: ## %bb.0: 257 ; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3d,0xc1] 258 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 259 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 260 ret <4 x i32> %res 261 } 262 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 263 264 265 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 266 ; SSE-LABEL: test_x86_sse41_pmaxud: 267 ; SSE: ## %bb.0: 268 ; SSE-NEXT: pmaxud %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3f,0xc1] 269 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 270 ; 271 ; AVX1-LABEL: test_x86_sse41_pmaxud: 272 ; AVX1: ## %bb.0: 273 ; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3f,0xc1] 274 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 275 ; 276 ; AVX512-LABEL: test_x86_sse41_pmaxud: 277 ; AVX512: ## %bb.0: 278 ; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3f,0xc1] 279 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 280 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 281 ret <4 x i32> %res 282 } 283 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 284 285 286 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 287 ; SSE-LABEL: test_x86_sse41_pmaxuw: 288 ; SSE: ## %bb.0: 289 ; SSE-NEXT: pmaxuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3e,0xc1] 290 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 291 ; 292 ; AVX1-LABEL: test_x86_sse41_pmaxuw: 293 ; AVX1: ## %bb.0: 294 ; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3e,0xc1] 295 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 296 ; 297 ; AVX512-LABEL: test_x86_sse41_pmaxuw: 298 ; AVX512: ## %bb.0: 299 ; AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1] 300 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 301 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 302 ret <8 x i16> %res 303 } 304 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 305 306 307 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 308 ; SSE-LABEL: test_x86_sse41_pminsb: 309 ; SSE: ## %bb.0: 310 ; SSE-NEXT: pminsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x38,0xc1] 311 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 312 ; 313 ; AVX1-LABEL: test_x86_sse41_pminsb: 314 ; AVX1: ## %bb.0: 315 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x38,0xc1] 316 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 317 ; 318 ; AVX512-LABEL: test_x86_sse41_pminsb: 319 ; AVX512: ## %bb.0: 320 ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x38,0xc1] 321 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 322 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 323 ret <16 x i8> %res 324 } 325 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 326 327 328 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 329 ; SSE-LABEL: test_x86_sse41_pminsd: 330 ; SSE: ## %bb.0: 331 ; SSE-NEXT: pminsd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x39,0xc1] 332 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 333 ; 334 ; AVX1-LABEL: test_x86_sse41_pminsd: 335 ; AVX1: ## %bb.0: 336 ; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x39,0xc1] 337 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 338 ; 339 ; AVX512-LABEL: test_x86_sse41_pminsd: 340 ; AVX512: ## %bb.0: 341 ; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x39,0xc1] 342 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 343 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 344 ret <4 x i32> %res 345 } 346 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 347 348 349 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 350 ; SSE-LABEL: test_x86_sse41_pminud: 351 ; SSE: ## %bb.0: 352 ; SSE-NEXT: pminud %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3b,0xc1] 353 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 354 ; 355 ; AVX1-LABEL: test_x86_sse41_pminud: 356 ; AVX1: ## %bb.0: 357 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3b,0xc1] 358 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 359 ; 360 ; AVX512-LABEL: test_x86_sse41_pminud: 361 ; AVX512: ## %bb.0: 362 ; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3b,0xc1] 363 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 364 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 365 ret <4 x i32> %res 366 } 367 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 368 369 370 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 371 ; SSE-LABEL: test_x86_sse41_pminuw: 372 ; SSE: ## %bb.0: 373 ; SSE-NEXT: pminuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3a,0xc1] 374 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 375 ; 376 ; AVX1-LABEL: test_x86_sse41_pminuw: 377 ; AVX1: ## %bb.0: 378 ; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3a,0xc1] 379 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 380 ; 381 ; AVX512-LABEL: test_x86_sse41_pminuw: 382 ; AVX512: ## %bb.0: 383 ; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1] 384 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 385 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 386 ret <8 x i16> %res 387 } 388 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 389 390 391 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { 392 ; SSE-LABEL: test_x86_sse41_ptestc: 393 ; SSE: ## %bb.0: 394 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] 395 ; SSE-NEXT: ptest %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x17,0xc1] 396 ; SSE-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] 397 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 398 ; 399 ; AVX-LABEL: test_x86_sse41_ptestc: 400 ; AVX: ## %bb.0: 401 ; AVX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] 402 ; AVX-NEXT: vptest %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x17,0xc1] 403 ; AVX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] 404 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 405 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 406 ret i32 %res 407 } 408 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 409 410 411 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { 412 ; SSE-LABEL: test_x86_sse41_ptestnzc: 413 ; SSE: ## %bb.0: 414 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] 415 ; SSE-NEXT: ptest %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x17,0xc1] 416 ; SSE-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] 417 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 418 ; 419 ; AVX-LABEL: test_x86_sse41_ptestnzc: 420 ; AVX: ## %bb.0: 421 ; AVX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] 422 ; AVX-NEXT: vptest %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x17,0xc1] 423 ; AVX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] 424 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 425 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 426 ret i32 %res 427 } 428 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 429 430 431 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { 432 ; SSE-LABEL: test_x86_sse41_ptestz: 433 ; SSE: ## %bb.0: 434 ; SSE-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] 435 ; SSE-NEXT: ptest %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x17,0xc1] 436 ; SSE-NEXT: sete %al ## encoding: [0x0f,0x94,0xc0] 437 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 438 ; 439 ; AVX-LABEL: test_x86_sse41_ptestz: 440 ; AVX: ## %bb.0: 441 ; AVX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] 442 ; AVX-NEXT: vptest %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x17,0xc1] 443 ; AVX-NEXT: sete %al ## encoding: [0x0f,0x94,0xc0] 444 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 445 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 446 ret i32 %res 447 } 448 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 449 450 451 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 452 ; SSE-LABEL: test_x86_sse41_round_pd: 453 ; SSE: ## %bb.0: 454 ; SSE-NEXT: roundpd $7, %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x09,0xc0,0x07] 455 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 456 ; 457 ; AVX1-LABEL: test_x86_sse41_round_pd: 458 ; AVX1: ## %bb.0: 459 ; AVX1-NEXT: vroundpd $7, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x09,0xc0,0x07] 460 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 461 ; 462 ; AVX512-LABEL: test_x86_sse41_round_pd: 463 ; AVX512: ## %bb.0: 464 ; AVX512-NEXT: vroundpd $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x09,0xc0,0x07] 465 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 466 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 467 ret <2 x double> %res 468 } 469 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 470 471 472 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 473 ; SSE-LABEL: test_x86_sse41_round_ps: 474 ; SSE: ## %bb.0: 475 ; SSE-NEXT: roundps $7, %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x08,0xc0,0x07] 476 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 477 ; 478 ; AVX1-LABEL: test_x86_sse41_round_ps: 479 ; AVX1: ## %bb.0: 480 ; AVX1-NEXT: vroundps $7, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x07] 481 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 482 ; 483 ; AVX512-LABEL: test_x86_sse41_round_ps: 484 ; AVX512: ## %bb.0: 485 ; AVX512-NEXT: vroundps $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x07] 486 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 487 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 488 ret <4 x float> %res 489 } 490 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 491 492 493 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 494 ; SSE-LABEL: test_x86_sse41_round_sd: 495 ; SSE: ## %bb.0: 496 ; SSE-NEXT: roundsd $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0xc1,0x07] 497 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 498 ; 499 ; AVX1-LABEL: test_x86_sse41_round_sd: 500 ; AVX1: ## %bb.0: 501 ; AVX1-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0xc1,0x07] 502 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 503 ; 504 ; AVX512-LABEL: test_x86_sse41_round_sd: 505 ; AVX512: ## %bb.0: 506 ; AVX512-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0b,0xc1,0x07] 507 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 508 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 509 ret <2 x double> %res 510 } 511 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 512 513 514 define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double>* %a1) { 515 ; X86-SSE-LABEL: test_x86_sse41_round_sd_load: 516 ; X86-SSE: ## %bb.0: 517 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 518 ; X86-SSE-NEXT: roundsd $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0x00,0x07] 519 ; X86-SSE-NEXT: retl ## encoding: [0xc3] 520 ; 521 ; X86-AVX1-LABEL: test_x86_sse41_round_sd_load: 522 ; X86-AVX1: ## %bb.0: 523 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 524 ; X86-AVX1-NEXT: vroundsd $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0x00,0x07] 525 ; X86-AVX1-NEXT: retl ## encoding: [0xc3] 526 ; 527 ; X86-AVX512-LABEL: test_x86_sse41_round_sd_load: 528 ; X86-AVX512: ## %bb.0: 529 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 530 ; X86-AVX512-NEXT: vroundsd $7, (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0b,0x00,0x07] 531 ; X86-AVX512-NEXT: retl ## encoding: [0xc3] 532 ; 533 ; X64-SSE-LABEL: test_x86_sse41_round_sd_load: 534 ; X64-SSE: ## %bb.0: 535 ; X64-SSE-NEXT: roundsd $7, (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0x07,0x07] 536 ; X64-SSE-NEXT: retq ## encoding: [0xc3] 537 ; 538 ; X64-AVX1-LABEL: test_x86_sse41_round_sd_load: 539 ; X64-AVX1: ## %bb.0: 540 ; X64-AVX1-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0x07,0x07] 541 ; X64-AVX1-NEXT: retq ## encoding: [0xc3] 542 ; 543 ; X64-AVX512-LABEL: test_x86_sse41_round_sd_load: 544 ; X64-AVX512: ## %bb.0: 545 ; X64-AVX512-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0b,0x07,0x07] 546 ; X64-AVX512-NEXT: retq ## encoding: [0xc3] 547 %a1b = load <2 x double>, <2 x double>* %a1 548 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1] 549 ret <2 x double> %res 550 } 551 552 553 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 554 ; SSE-LABEL: test_x86_sse41_round_ss: 555 ; SSE: ## %bb.0: 556 ; SSE-NEXT: roundss $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0a,0xc1,0x07] 557 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 558 ; 559 ; AVX1-LABEL: test_x86_sse41_round_ss: 560 ; AVX1: ## %bb.0: 561 ; AVX1-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0a,0xc1,0x07] 562 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 563 ; 564 ; AVX512-LABEL: test_x86_sse41_round_ss: 565 ; AVX512: ## %bb.0: 566 ; AVX512-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0a,0xc1,0x07] 567 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 568 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 569 ret <4 x float> %res 570 } 571 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 572