1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+pclmul,+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX 3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+pclmul,+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX512VL,X86-AVX512VL 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+pclmul,+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+pclmul,+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512VL,X64-AVX512VL 6 7 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 8 ; CHECK-LABEL: test_x86_avx_addsub_pd_256: 9 ; CHECK: # %bb.0: 10 ; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd0,0xc1] 11 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 13 ret <4 x double> %res 14 } 15 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 16 17 18 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 19 ; CHECK-LABEL: test_x86_avx_addsub_ps_256: 20 ; CHECK: # %bb.0: 21 ; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0xd0,0xc1] 22 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 23 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 24 ret <8 x float> %res 25 } 26 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 27 28 29 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 30 ; CHECK-LABEL: test_x86_avx_blendv_pd_256: 31 ; CHECK: # %bb.0: 32 ; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4b,0xc1,0x20] 33 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 34 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] 35 ret <4 x double> %res 36 } 37 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 38 39 40 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 41 ; CHECK-LABEL: test_x86_avx_blendv_ps_256: 42 ; CHECK: # %bb.0: 43 ; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4a,0xc1,0x20] 44 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 45 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] 46 ret <8 x float> %res 47 } 48 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 49 50 51 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { 52 ; CHECK-LABEL: test_x86_avx_cmp_pd_256: 53 ; CHECK: # %bb.0: 54 ; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xc2,0xc1,0x07] 55 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 56 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 57 ret <4 x double> %res 58 } 59 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 60 61 62 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { 63 ; CHECK-LABEL: test_x86_avx_cmp_ps_256: 64 ; CHECK: # %bb.0: 65 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0xc2,0xc1,0x07] 66 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 67 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 68 ret <8 x float> %res 69 } 70 71 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) { 72 ; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op: 73 ; CHECK: # %bb.0: 74 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x00] 75 ; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x01] 76 ; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x02] 77 ; CHECK-NEXT: vcmpunordps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x03] 78 ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x04] 79 ; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x05] 80 ; CHECK-NEXT: vcmpnleps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x06] 81 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x07] 82 ; CHECK-NEXT: vcmpeq_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x08] 83 ; CHECK-NEXT: vcmpngeps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x09] 84 ; CHECK-NEXT: vcmpngtps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0a] 85 ; CHECK-NEXT: vcmpfalseps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0b] 86 ; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0c] 87 ; CHECK-NEXT: vcmpgeps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0d] 88 ; CHECK-NEXT: vcmpgtps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0e] 89 ; CHECK-NEXT: vcmptrueps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0f] 90 ; CHECK-NEXT: vcmpeq_osps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x10] 91 ; CHECK-NEXT: vcmplt_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x11] 92 ; CHECK-NEXT: vcmple_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x12] 93 ; CHECK-NEXT: vcmpunord_sps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x13] 94 ; CHECK-NEXT: vcmpneq_usps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x14] 95 ; CHECK-NEXT: vcmpnlt_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x15] 96 ; CHECK-NEXT: vcmpnle_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x16] 97 ; CHECK-NEXT: vcmpord_sps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x17] 98 ; CHECK-NEXT: vcmpeq_usps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x18] 99 ; CHECK-NEXT: vcmpnge_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x19] 100 ; CHECK-NEXT: vcmpngt_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1a] 101 ; CHECK-NEXT: vcmpfalse_osps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1b] 102 ; CHECK-NEXT: vcmpneq_osps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1c] 103 ; CHECK-NEXT: vcmpge_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1d] 104 ; CHECK-NEXT: vcmpgt_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1e] 105 ; CHECK-NEXT: vcmptrue_usps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0xc2,0xc1,0x1f] 106 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 107 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1] 108 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1] 109 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1] 110 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1] 111 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1] 112 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1] 113 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1] 114 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1] 115 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1] 116 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1] 117 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1] 118 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1] 119 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1] 120 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1] 121 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1] 122 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1] 123 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1] 124 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1] 125 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1] 126 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1] 127 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1] 128 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1] 129 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1] 130 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1] 131 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1] 132 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1] 133 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1] 134 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1] 135 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1] 136 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1] 137 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1] 138 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1] 139 ret <8 x float> %res 140 } 141 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 142 143 144 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { 145 ; AVX-LABEL: test_x86_avx_cvt_pd2_ps_256: 146 ; AVX: # %bb.0: 147 ; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0 # encoding: [0xc5,0xfd,0x5a,0xc0] 148 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 149 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 150 ; 151 ; AVX512VL-LABEL: test_x86_avx_cvt_pd2_ps_256: 152 ; AVX512VL: # %bb.0: 153 ; AVX512VL-NEXT: vcvtpd2ps %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xc0] 154 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 155 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 156 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] 157 ret <4 x float> %res 158 } 159 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone 160 161 162 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { 163 ; AVX-LABEL: test_x86_avx_cvt_pd2dq_256: 164 ; AVX: # %bb.0: 165 ; AVX-NEXT: vcvtpd2dq %ymm0, %xmm0 # encoding: [0xc5,0xff,0xe6,0xc0] 166 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 167 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 168 ; 169 ; AVX512VL-LABEL: test_x86_avx_cvt_pd2dq_256: 170 ; AVX512VL: # %bb.0: 171 ; AVX512VL-NEXT: vcvtpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xc0] 172 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 173 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 174 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 175 ret <4 x i32> %res 176 } 177 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 178 179 180 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { 181 ; AVX-LABEL: test_x86_avx_cvt_ps2dq_256: 182 ; AVX: # %bb.0: 183 ; AVX-NEXT: vcvtps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5b,0xc0] 184 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 185 ; 186 ; AVX512VL-LABEL: test_x86_avx_cvt_ps2dq_256: 187 ; AVX512VL: # %bb.0: 188 ; AVX512VL-NEXT: vcvtps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5b,0xc0] 189 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 190 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 191 ret <8 x i32> %res 192 } 193 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 194 195 196 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { 197 ; AVX-LABEL: test_x86_avx_cvtt_pd2dq_256: 198 ; AVX: # %bb.0: 199 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 # encoding: [0xc5,0xfd,0xe6,0xc0] 200 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 201 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 202 ; 203 ; AVX512VL-LABEL: test_x86_avx_cvtt_pd2dq_256: 204 ; AVX512VL: # %bb.0: 205 ; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0] 206 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 207 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 208 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 209 ret <4 x i32> %res 210 } 211 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone 212 213 214 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { 215 ; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256: 216 ; AVX: # %bb.0: 217 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfe,0x5b,0xc0] 218 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 219 ; 220 ; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256: 221 ; AVX512VL: # %bb.0: 222 ; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0] 223 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 224 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 225 ret <8 x i32> %res 226 } 227 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone 228 229 230 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 231 ; CHECK-LABEL: test_x86_avx_dp_ps_256: 232 ; CHECK: # %bb.0: 233 ; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07] 234 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 235 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 236 ret <8 x float> %res 237 } 238 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 239 240 241 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { 242 ; CHECK-LABEL: test_x86_avx_hadd_pd_256: 243 ; CHECK: # %bb.0: 244 ; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x7c,0xc1] 245 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 246 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 247 ret <4 x double> %res 248 } 249 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 250 251 252 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { 253 ; CHECK-LABEL: test_x86_avx_hadd_ps_256: 254 ; CHECK: # %bb.0: 255 ; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0x7c,0xc1] 256 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 257 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 258 ret <8 x float> %res 259 } 260 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 261 262 263 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 264 ; CHECK-LABEL: test_x86_avx_hsub_pd_256: 265 ; CHECK: # %bb.0: 266 ; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x7d,0xc1] 267 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 268 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 269 ret <4 x double> %res 270 } 271 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 272 273 274 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 275 ; CHECK-LABEL: test_x86_avx_hsub_ps_256: 276 ; CHECK: # %bb.0: 277 ; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0x7d,0xc1] 278 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 279 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 280 ret <8 x float> %res 281 } 282 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 283 284 285 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { 286 ; X86-LABEL: test_x86_avx_ldu_dq_256: 287 ; X86: # %bb.0: 288 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 289 ; X86-NEXT: vlddqu (%eax), %ymm0 # encoding: [0xc5,0xff,0xf0,0x00] 290 ; X86-NEXT: retl # encoding: [0xc3] 291 ; 292 ; X64-LABEL: test_x86_avx_ldu_dq_256: 293 ; X64: # %bb.0: 294 ; X64-NEXT: vlddqu (%rdi), %ymm0 # encoding: [0xc5,0xff,0xf0,0x07] 295 ; X64-NEXT: retq # encoding: [0xc3] 296 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 297 ret <32 x i8> %res 298 } 299 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly 300 301 302 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) { 303 ; X86-LABEL: test_x86_avx_maskload_pd: 304 ; X86: # %bb.0: 305 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 306 ; X86-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x00] 307 ; X86-NEXT: retl # encoding: [0xc3] 308 ; 309 ; X64-LABEL: test_x86_avx_maskload_pd: 310 ; X64: # %bb.0: 311 ; X64-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x07] 312 ; X64-NEXT: retq # encoding: [0xc3] 313 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1] 314 ret <2 x double> %res 315 } 316 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly 317 318 319 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) { 320 ; X86-LABEL: test_x86_avx_maskload_pd_256: 321 ; X86: # %bb.0: 322 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 323 ; X86-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x00] 324 ; X86-NEXT: retl # encoding: [0xc3] 325 ; 326 ; X64-LABEL: test_x86_avx_maskload_pd_256: 327 ; X64: # %bb.0: 328 ; X64-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x07] 329 ; X64-NEXT: retq # encoding: [0xc3] 330 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1] 331 ret <4 x double> %res 332 } 333 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly 334 335 336 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) { 337 ; X86-LABEL: test_x86_avx_maskload_ps: 338 ; X86: # %bb.0: 339 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 340 ; X86-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x00] 341 ; X86-NEXT: retl # encoding: [0xc3] 342 ; 343 ; X64-LABEL: test_x86_avx_maskload_ps: 344 ; X64: # %bb.0: 345 ; X64-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x07] 346 ; X64-NEXT: retq # encoding: [0xc3] 347 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1] 348 ret <4 x float> %res 349 } 350 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly 351 352 353 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) { 354 ; X86-LABEL: test_x86_avx_maskload_ps_256: 355 ; X86: # %bb.0: 356 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 357 ; X86-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x00] 358 ; X86-NEXT: retl # encoding: [0xc3] 359 ; 360 ; X64-LABEL: test_x86_avx_maskload_ps_256: 361 ; X64: # %bb.0: 362 ; X64-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x07] 363 ; X64-NEXT: retq # encoding: [0xc3] 364 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1] 365 ret <8 x float> %res 366 } 367 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly 368 369 370 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) { 371 ; X86-LABEL: test_x86_avx_maskstore_pd: 372 ; X86: # %bb.0: 373 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 374 ; X86-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2f,0x08] 375 ; X86-NEXT: retl # encoding: [0xc3] 376 ; 377 ; X64-LABEL: test_x86_avx_maskstore_pd: 378 ; X64: # %bb.0: 379 ; X64-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2f,0x0f] 380 ; X64-NEXT: retq # encoding: [0xc3] 381 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) 382 ret void 383 } 384 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind 385 386 387 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) { 388 ; X86-LABEL: test_x86_avx_maskstore_pd_256: 389 ; X86: # %bb.0: 390 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 391 ; X86-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2f,0x08] 392 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 393 ; X86-NEXT: retl # encoding: [0xc3] 394 ; 395 ; X64-LABEL: test_x86_avx_maskstore_pd_256: 396 ; X64: # %bb.0: 397 ; X64-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2f,0x0f] 398 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 399 ; X64-NEXT: retq # encoding: [0xc3] 400 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) 401 ret void 402 } 403 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind 404 405 406 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) { 407 ; X86-LABEL: test_x86_avx_maskstore_ps: 408 ; X86: # %bb.0: 409 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 410 ; X86-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2e,0x08] 411 ; X86-NEXT: retl # encoding: [0xc3] 412 ; 413 ; X64-LABEL: test_x86_avx_maskstore_ps: 414 ; X64: # %bb.0: 415 ; X64-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2e,0x0f] 416 ; X64-NEXT: retq # encoding: [0xc3] 417 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) 418 ret void 419 } 420 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind 421 422 423 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) { 424 ; X86-LABEL: test_x86_avx_maskstore_ps_256: 425 ; X86: # %bb.0: 426 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 427 ; X86-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2e,0x08] 428 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 429 ; X86-NEXT: retl # encoding: [0xc3] 430 ; 431 ; X64-LABEL: test_x86_avx_maskstore_ps_256: 432 ; X64: # %bb.0: 433 ; X64-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2e,0x0f] 434 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 435 ; X64-NEXT: retq # encoding: [0xc3] 436 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) 437 ret void 438 } 439 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind 440 441 442 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { 443 ; AVX-LABEL: test_x86_avx_max_pd_256: 444 ; AVX: # %bb.0: 445 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5f,0xc1] 446 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 447 ; 448 ; AVX512VL-LABEL: test_x86_avx_max_pd_256: 449 ; AVX512VL: # %bb.0: 450 ; AVX512VL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5f,0xc1] 451 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 452 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 453 ret <4 x double> %res 454 } 455 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 456 457 458 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { 459 ; AVX-LABEL: test_x86_avx_max_ps_256: 460 ; AVX: # %bb.0: 461 ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5f,0xc1] 462 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 463 ; 464 ; AVX512VL-LABEL: test_x86_avx_max_ps_256: 465 ; AVX512VL: # %bb.0: 466 ; AVX512VL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1] 467 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 468 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 469 ret <8 x float> %res 470 } 471 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 472 473 474 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { 475 ; AVX-LABEL: test_x86_avx_min_pd_256: 476 ; AVX: # %bb.0: 477 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5d,0xc1] 478 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 479 ; 480 ; AVX512VL-LABEL: test_x86_avx_min_pd_256: 481 ; AVX512VL: # %bb.0: 482 ; AVX512VL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5d,0xc1] 483 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 484 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 485 ret <4 x double> %res 486 } 487 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 488 489 490 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { 491 ; AVX-LABEL: test_x86_avx_min_ps_256: 492 ; AVX: # %bb.0: 493 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5d,0xc1] 494 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 495 ; 496 ; AVX512VL-LABEL: test_x86_avx_min_ps_256: 497 ; AVX512VL: # %bb.0: 498 ; AVX512VL-NEXT: vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1] 499 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 500 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 501 ret <8 x float> %res 502 } 503 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 504 505 506 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { 507 ; CHECK-LABEL: test_x86_avx_movmsk_pd_256: 508 ; CHECK: # %bb.0: 509 ; CHECK-NEXT: vmovmskpd %ymm0, %eax # encoding: [0xc5,0xfd,0x50,0xc0] 510 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 511 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 512 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] 513 ret i32 %res 514 } 515 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 516 517 518 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { 519 ; CHECK-LABEL: test_x86_avx_movmsk_ps_256: 520 ; CHECK: # %bb.0: 521 ; CHECK-NEXT: vmovmskps %ymm0, %eax # encoding: [0xc5,0xfc,0x50,0xc0] 522 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 523 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 524 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] 525 ret i32 %res 526 } 527 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 528 529 530 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { 531 ; CHECK-LABEL: test_x86_avx_ptestc_256: 532 ; CHECK: # %bb.0: 533 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 534 ; CHECK-NEXT: vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1] 535 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0] 536 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 537 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 538 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 539 ret i32 %res 540 } 541 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone 542 543 544 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { 545 ; CHECK-LABEL: test_x86_avx_ptestnzc_256: 546 ; CHECK: # %bb.0: 547 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 548 ; CHECK-NEXT: vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1] 549 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 550 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 551 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 552 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 553 ret i32 %res 554 } 555 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone 556 557 558 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { 559 ; CHECK-LABEL: test_x86_avx_ptestz_256: 560 ; CHECK: # %bb.0: 561 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 562 ; CHECK-NEXT: vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1] 563 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 564 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 565 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 566 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 567 ret i32 %res 568 } 569 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone 570 571 572 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { 573 ; CHECK-LABEL: test_x86_avx_rcp_ps_256: 574 ; CHECK: # %bb.0: 575 ; CHECK-NEXT: vrcpps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x53,0xc0] 576 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 577 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 578 ret <8 x float> %res 579 } 580 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 581 582 583 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { 584 ; AVX-LABEL: test_x86_avx_round_pd_256: 585 ; AVX: # %bb.0: 586 ; AVX-NEXT: vroundpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x09,0xc0,0x07] 587 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 588 ; 589 ; AVX512VL-LABEL: test_x86_avx_round_pd_256: 590 ; AVX512VL: # %bb.0: 591 ; AVX512VL-NEXT: vroundpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x09,0xc0,0x07] 592 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 593 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] 594 ret <4 x double> %res 595 } 596 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 597 598 599 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { 600 ; AVX-LABEL: test_x86_avx_round_ps_256: 601 ; AVX: # %bb.0: 602 ; AVX-NEXT: vroundps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x08,0xc0,0x07] 603 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 604 ; 605 ; AVX512VL-LABEL: test_x86_avx_round_ps_256: 606 ; AVX512VL: # %bb.0: 607 ; AVX512VL-NEXT: vroundps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x08,0xc0,0x07] 608 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 609 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] 610 ret <8 x float> %res 611 } 612 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 613 614 615 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { 616 ; CHECK-LABEL: test_x86_avx_rsqrt_ps_256: 617 ; CHECK: # %bb.0: 618 ; CHECK-NEXT: vrsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x52,0xc0] 619 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 620 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 621 ret <8 x float> %res 622 } 623 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 624 625 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { 626 ; AVX-LABEL: test_x86_avx_vpermilvar_pd: 627 ; AVX: # %bb.0: 628 ; AVX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0d,0xc1] 629 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 630 ; 631 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd: 632 ; AVX512VL: # %bb.0: 633 ; AVX512VL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xc1] 634 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 635 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] 636 ret <2 x double> %res 637 } 638 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 639 640 641 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { 642 ; AVX-LABEL: test_x86_avx_vpermilvar_pd_256: 643 ; AVX: # %bb.0: 644 ; AVX-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0d,0xc1] 645 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 646 ; 647 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256: 648 ; AVX512VL: # %bb.0: 649 ; AVX512VL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xc1] 650 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 651 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] 652 ret <4 x double> %res 653 } 654 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 655 656 define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) { 657 ; AVX-LABEL: test_x86_avx_vpermilvar_pd_256_2: 658 ; AVX: # %bb.0: 659 ; AVX-NEXT: vpermilpd $9, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x09] 660 ; AVX-NEXT: # ymm0 = ymm0[1,0,2,3] 661 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 662 ; 663 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2: 664 ; AVX512VL: # %bb.0: 665 ; AVX512VL-NEXT: vpermilpd $9, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x09] 666 ; AVX512VL-NEXT: # ymm0 = ymm0[1,0,2,3] 667 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 668 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1] 669 ret <4 x double> %res 670 } 671 672 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { 673 ; AVX-LABEL: test_x86_avx_vpermilvar_ps: 674 ; AVX: # %bb.0: 675 ; AVX-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0xc1] 676 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 677 ; 678 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps: 679 ; AVX512VL: # %bb.0: 680 ; AVX512VL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xc1] 681 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 682 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] 683 ret <4 x float> %res 684 } 685 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) { 686 ; X86-AVX-LABEL: test_x86_avx_vpermilvar_ps_load: 687 ; X86-AVX: # %bb.0: 688 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 689 ; X86-AVX-NEXT: vpermilps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x00] 690 ; X86-AVX-NEXT: retl # encoding: [0xc3] 691 ; 692 ; X86-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load: 693 ; X86-AVX512VL: # %bb.0: 694 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 695 ; X86-AVX512VL-NEXT: vpermilps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00] 696 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 697 ; 698 ; X64-AVX-LABEL: test_x86_avx_vpermilvar_ps_load: 699 ; X64-AVX: # %bb.0: 700 ; X64-AVX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x07] 701 ; X64-AVX-NEXT: retq # encoding: [0xc3] 702 ; 703 ; X64-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load: 704 ; X64-AVX512VL: # %bb.0: 705 ; X64-AVX512VL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x07] 706 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 707 %a2 = load <4 x i32>, <4 x i32>* %a1 708 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] 709 ret <4 x float> %res 710 } 711 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 712 713 714 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { 715 ; AVX-LABEL: test_x86_avx_vpermilvar_ps_256: 716 ; AVX: # %bb.0: 717 ; AVX-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0c,0xc1] 718 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 719 ; 720 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_256: 721 ; AVX512VL: # %bb.0: 722 ; AVX512VL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xc1] 723 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 724 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 725 ret <8 x float> %res 726 } 727 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 728 729 730 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { 731 ; CHECK-LABEL: test_x86_avx_vtestc_pd: 732 ; CHECK: # %bb.0: 733 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 734 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1] 735 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0] 736 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 737 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 738 ret i32 %res 739 } 740 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 741 742 743 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { 744 ; CHECK-LABEL: test_x86_avx_vtestc_pd_256: 745 ; CHECK: # %bb.0: 746 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 747 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1] 748 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0] 749 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 750 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 751 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 752 ret i32 %res 753 } 754 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 755 756 757 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { 758 ; CHECK-LABEL: test_x86_avx_vtestc_ps: 759 ; CHECK: # %bb.0: 760 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 761 ; CHECK-NEXT: vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1] 762 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0] 763 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 764 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 765 ret i32 %res 766 } 767 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 768 769 770 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { 771 ; CHECK-LABEL: test_x86_avx_vtestc_ps_256: 772 ; CHECK: # %bb.0: 773 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 774 ; CHECK-NEXT: vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1] 775 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0] 776 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 777 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 778 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 779 ret i32 %res 780 } 781 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 782 783 784 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { 785 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd: 786 ; CHECK: # %bb.0: 787 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 788 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1] 789 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 790 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 791 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 792 ret i32 %res 793 } 794 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 795 796 797 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { 798 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256: 799 ; CHECK: # %bb.0: 800 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 801 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1] 802 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 803 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 804 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 805 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 806 ret i32 %res 807 } 808 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 809 810 811 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { 812 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps: 813 ; CHECK: # %bb.0: 814 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 815 ; CHECK-NEXT: vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1] 816 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 817 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 818 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 819 ret i32 %res 820 } 821 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 822 823 824 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { 825 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256: 826 ; CHECK: # %bb.0: 827 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 828 ; CHECK-NEXT: vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1] 829 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 830 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 831 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 832 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 833 ret i32 %res 834 } 835 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 836 837 838 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { 839 ; CHECK-LABEL: test_x86_avx_vtestz_pd: 840 ; CHECK: # %bb.0: 841 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 842 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1] 843 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 844 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 845 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 846 ret i32 %res 847 } 848 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 849 850 851 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { 852 ; CHECK-LABEL: test_x86_avx_vtestz_pd_256: 853 ; CHECK: # %bb.0: 854 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 855 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1] 856 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 857 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 858 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 859 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 860 ret i32 %res 861 } 862 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 863 864 865 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { 866 ; CHECK-LABEL: test_x86_avx_vtestz_ps: 867 ; CHECK: # %bb.0: 868 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 869 ; CHECK-NEXT: vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1] 870 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 871 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 872 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 873 ret i32 %res 874 } 875 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 876 877 878 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { 879 ; CHECK-LABEL: test_x86_avx_vtestz_ps_256: 880 ; CHECK: # %bb.0: 881 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 882 ; CHECK-NEXT: vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1] 883 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 884 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 885 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 886 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 887 ret i32 %res 888 } 889 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 890 891 892 define void @test_x86_avx_vzeroall() { 893 ; CHECK-LABEL: test_x86_avx_vzeroall: 894 ; CHECK: # %bb.0: 895 ; CHECK-NEXT: vzeroall # encoding: [0xc5,0xfc,0x77] 896 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 897 call void @llvm.x86.avx.vzeroall() 898 ret void 899 } 900 declare void @llvm.x86.avx.vzeroall() nounwind 901 902 903 define void @test_x86_avx_vzeroupper() { 904 ; CHECK-LABEL: test_x86_avx_vzeroupper: 905 ; CHECK: # %bb.0: 906 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 907 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 908 call void @llvm.x86.avx.vzeroupper() 909 ret void 910 } 911 declare void @llvm.x86.avx.vzeroupper() nounwind 912 913 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { 914 ; X86-AVX-LABEL: movnt_dq: 915 ; X86-AVX: # %bb.0: 916 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 917 ; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 918 ; X86-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1] 919 ; X86-AVX-NEXT: vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00] 920 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 921 ; X86-AVX-NEXT: retl # encoding: [0xc3] 922 ; 923 ; X86-AVX512VL-LABEL: movnt_dq: 924 ; X86-AVX512VL: # %bb.0: 925 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 926 ; X86-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 927 ; X86-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1] 928 ; X86-AVX512VL-NEXT: vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00] 929 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 930 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 931 ; 932 ; X64-AVX-LABEL: movnt_dq: 933 ; X64-AVX: # %bb.0: 934 ; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 935 ; X64-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1] 936 ; X64-AVX-NEXT: vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07] 937 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 938 ; X64-AVX-NEXT: retq # encoding: [0xc3] 939 ; 940 ; X64-AVX512VL-LABEL: movnt_dq: 941 ; X64-AVX512VL: # %bb.0: 942 ; X64-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 943 ; X64-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1] 944 ; X64-AVX512VL-NEXT: vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07] 945 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 946 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 947 %a2 = add <2 x i64> %a1, <i64 1, i64 1> 948 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 949 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind 950 ret void 951 } 952 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind 953 954 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind { 955 ; X86-AVX-LABEL: movnt_ps: 956 ; X86-AVX: # %bb.0: 957 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 958 ; X86-AVX-NEXT: vmovntps %ymm0, (%eax) # encoding: [0xc5,0xfc,0x2b,0x00] 959 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 960 ; X86-AVX-NEXT: retl # encoding: [0xc3] 961 ; 962 ; X86-AVX512VL-LABEL: movnt_ps: 963 ; X86-AVX512VL: # %bb.0: 964 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 965 ; X86-AVX512VL-NEXT: vmovntps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00] 966 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 967 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 968 ; 969 ; X64-AVX-LABEL: movnt_ps: 970 ; X64-AVX: # %bb.0: 971 ; X64-AVX-NEXT: vmovntps %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x2b,0x07] 972 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 973 ; X64-AVX-NEXT: retq # encoding: [0xc3] 974 ; 975 ; X64-AVX512VL-LABEL: movnt_ps: 976 ; X64-AVX512VL: # %bb.0: 977 ; X64-AVX512VL-NEXT: vmovntps %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x07] 978 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 979 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 980 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind 981 ret void 982 } 983 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind 984 985 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind { 986 ; add operation forces the execution domain. 987 ; X86-AVX-LABEL: movnt_pd: 988 ; X86-AVX: # %bb.0: 989 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 990 ; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 991 ; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1] 992 ; X86-AVX-NEXT: vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00] 993 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 994 ; X86-AVX-NEXT: retl # encoding: [0xc3] 995 ; 996 ; X86-AVX512VL-LABEL: movnt_pd: 997 ; X86-AVX512VL: # %bb.0: 998 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 999 ; X86-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 1000 ; X86-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 1001 ; X86-AVX512VL-NEXT: vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00] 1002 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1003 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1004 ; 1005 ; X64-AVX-LABEL: movnt_pd: 1006 ; X64-AVX: # %bb.0: 1007 ; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 1008 ; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1] 1009 ; X64-AVX-NEXT: vmovntpd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x2b,0x07] 1010 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1011 ; X64-AVX-NEXT: retq # encoding: [0xc3] 1012 ; 1013 ; X64-AVX512VL-LABEL: movnt_pd: 1014 ; X64-AVX512VL: # %bb.0: 1015 ; X64-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 1016 ; X64-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 1017 ; X64-AVX512VL-NEXT: vmovntpd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x07] 1018 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1019 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1020 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 1021 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind 1022 ret void 1023 } 1024 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind 1025 1026 1027 ; Check for pclmulqdq 1028 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) { 1029 ; CHECK-LABEL: test_x86_pclmulqdq: 1030 ; CHECK: # %bb.0: 1031 ; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x44,0xc1,0x00] 1032 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1033 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1] 1034 ret <2 x i64> %res 1035 } 1036 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone 1037