1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX 3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX512VL,X86-AVX512VL 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512VL,X64-AVX512VL 6 7 ; We don't check any vinsertf128 variant with immediate 0 because that's just a blend. 8 9 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 10 ; AVX-LABEL: test_x86_avx_sqrt_pd_256: 11 ; AVX: # %bb.0: 12 ; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0] 13 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14 ; 15 ; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256: 16 ; AVX512VL: # %bb.0: 17 ; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0] 18 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 19 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 20 ret <4 x double> %res 21 } 22 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 23 24 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 25 ; AVX-LABEL: test_x86_avx_sqrt_ps_256: 26 ; AVX: # %bb.0: 27 ; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0] 28 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 29 ; 30 ; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256: 31 ; AVX512VL: # %bb.0: 32 ; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0] 33 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 34 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 35 ret <8 x float> %res 36 } 37 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 38 39 define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) { 40 ; AVX-LABEL: test_x86_avx_vinsertf128_pd_256_1: 41 ; AVX: # %bb.0: 42 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 43 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 44 ; 45 ; AVX512VL-LABEL: test_x86_avx_vinsertf128_pd_256_1: 46 ; AVX512VL: # %bb.0: 47 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 48 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 49 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1) 50 ret <4 x double> %res 51 } 52 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone 53 54 define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) { 55 ; AVX-LABEL: test_x86_avx_vinsertf128_ps_256_1: 56 ; AVX: # %bb.0: 57 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 58 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 59 ; 60 ; AVX512VL-LABEL: test_x86_avx_vinsertf128_ps_256_1: 61 ; AVX512VL: # %bb.0: 62 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 63 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 64 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1) 65 ret <8 x float> %res 66 } 67 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone 68 69 define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) { 70 ; AVX-LABEL: test_x86_avx_vinsertf128_si_256_1: 71 ; AVX: # %bb.0: 72 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 73 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 74 ; 75 ; AVX512VL-LABEL: test_x86_avx_vinsertf128_si_256_1: 76 ; AVX512VL: # %bb.0: 77 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 78 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 79 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1) 80 ret <8 x i32> %res 81 } 82 83 ; Verify that high bits of the immediate are masked off. This should be the equivalent 84 ; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's 85 ; not a vinsertf128 $1. 86 define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) { 87 ; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2: 88 ; CHECK: # %bb.0: 89 ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 90 ; CHECK-NEXT: vblendps $240, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xf0] 91 ; CHECK-NEXT: # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 92 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 93 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2) 94 ret <8 x i32> %res 95 } 96 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone 97 98 ; We don't check any vextractf128 variant with immediate 0 because that's just a move. 99 100 define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) { 101 ; AVX-LABEL: test_x86_avx_vextractf128_pd_256_1: 102 ; AVX: # %bb.0: 103 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 104 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 105 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 106 ; 107 ; AVX512VL-LABEL: test_x86_avx_vextractf128_pd_256_1: 108 ; AVX512VL: # %bb.0: 109 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 110 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 111 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 112 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1) 113 ret <2 x double> %res 114 } 115 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone 116 117 define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) { 118 ; AVX-LABEL: test_x86_avx_vextractf128_ps_256_1: 119 ; AVX: # %bb.0: 120 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 121 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 122 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 123 ; 124 ; AVX512VL-LABEL: test_x86_avx_vextractf128_ps_256_1: 125 ; AVX512VL: # %bb.0: 126 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 127 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 128 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 129 %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1) 130 ret <4 x float> %res 131 } 132 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone 133 134 define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) { 135 ; AVX-LABEL: test_x86_avx_vextractf128_si_256_1: 136 ; AVX: # %bb.0: 137 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 138 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 139 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 140 ; 141 ; AVX512VL-LABEL: test_x86_avx_vextractf128_si_256_1: 142 ; AVX512VL: # %bb.0: 143 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 144 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 145 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 146 %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1) 147 ret <4 x i32> %res 148 } 149 declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone 150 151 ; Verify that high bits of the immediate are masked off. This should be the equivalent 152 ; of a vextractf128 $0 which should be optimized away, so just check that it's 153 ; not a vextractf128 of any kind. 154 define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) { 155 ; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2: 156 ; CHECK: # %bb.0: 157 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 158 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 159 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 160 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2) 161 ret <2 x double> %res 162 } 163 164 165 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 166 ; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256: 167 ; X86-AVX: # %bb.0: 168 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 169 ; X86-AVX-NEXT: vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 170 ; X86-AVX-NEXT: # ymm0 = mem[0,1,0,1] 171 ; X86-AVX-NEXT: retl # encoding: [0xc3] 172 ; 173 ; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256: 174 ; X86-AVX512VL: # %bb.0: 175 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 176 ; X86-AVX512VL-NEXT: vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 177 ; X86-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 178 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 179 ; 180 ; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256: 181 ; X64-AVX: # %bb.0: 182 ; X64-AVX-NEXT: vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 183 ; X64-AVX-NEXT: # ymm0 = mem[0,1,0,1] 184 ; X64-AVX-NEXT: retq # encoding: [0xc3] 185 ; 186 ; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256: 187 ; X64-AVX512VL: # %bb.0: 188 ; X64-AVX512VL-NEXT: vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 189 ; X64-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 190 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 191 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 192 ret <4 x double> %res 193 } 194 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 195 196 197 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 198 ; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256: 199 ; X86-AVX: # %bb.0: 200 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 201 ; X86-AVX-NEXT: vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 202 ; X86-AVX-NEXT: # ymm0 = mem[0,1,0,1] 203 ; X86-AVX-NEXT: retl # encoding: [0xc3] 204 ; 205 ; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256: 206 ; X86-AVX512VL: # %bb.0: 207 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 208 ; X86-AVX512VL-NEXT: vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 209 ; X86-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 210 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 211 ; 212 ; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256: 213 ; X64-AVX: # %bb.0: 214 ; X64-AVX-NEXT: vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 215 ; X64-AVX-NEXT: # ymm0 = mem[0,1,0,1] 216 ; X64-AVX-NEXT: retq # encoding: [0xc3] 217 ; 218 ; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256: 219 ; X64-AVX512VL: # %bb.0: 220 ; X64-AVX512VL-NEXT: vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 221 ; X64-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 222 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 223 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 224 ret <8 x float> %res 225 } 226 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 227 228 229 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { 230 ; CHECK-LABEL: test_x86_avx_blend_pd_256: 231 ; CHECK: # %bb.0: 232 ; CHECK-NEXT: vblendps $192, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xc0] 233 ; CHECK-NEXT: # ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] 234 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 235 %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] 236 ret <4 x double> %res 237 } 238 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone 239 240 241 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { 242 ; CHECK-LABEL: test_x86_avx_blend_ps_256: 243 ; CHECK: # %bb.0: 244 ; CHECK-NEXT: vblendps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07] 245 ; CHECK-NEXT: # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 246 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 247 %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 248 ret <8 x float> %res 249 } 250 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 251 252 253 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 254 ; CHECK-LABEL: test_x86_avx_dp_ps_256: 255 ; CHECK: # %bb.0: 256 ; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07] 257 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 258 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 259 ret <8 x float> %res 260 } 261 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 262 263 264 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 265 ; AVX-LABEL: test_x86_sse2_psll_dq: 266 ; AVX: # %bb.0: 267 ; AVX-NEXT: vpslldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x01] 268 ; AVX-NEXT: # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 269 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 270 ; 271 ; AVX512VL-LABEL: test_x86_sse2_psll_dq: 272 ; AVX512VL: # %bb.0: 273 ; AVX512VL-NEXT: vpslldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01] 274 ; AVX512VL-NEXT: # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 275 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 276 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 277 ret <2 x i64> %res 278 } 279 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 280 281 282 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 283 ; AVX-LABEL: test_x86_sse2_psrl_dq: 284 ; AVX: # %bb.0: 285 ; AVX-NEXT: vpsrldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x01] 286 ; AVX-NEXT: # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 287 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 288 ; 289 ; AVX512VL-LABEL: test_x86_sse2_psrl_dq: 290 ; AVX512VL: # %bb.0: 291 ; AVX512VL-NEXT: vpsrldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01] 292 ; AVX512VL-NEXT: # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 293 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 294 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 295 ret <2 x i64> %res 296 } 297 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 298 299 300 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { 301 ; CHECK-LABEL: test_x86_sse41_blendpd: 302 ; CHECK: # %bb.0: 303 ; CHECK-NEXT: vblendps $3, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x03] 304 ; CHECK-NEXT: # xmm0 = xmm0[0,1],xmm1[2,3] 305 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 306 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1] 307 ret <2 x double> %res 308 } 309 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone 310 311 312 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { 313 ; CHECK-LABEL: test_x86_sse41_blendps: 314 ; CHECK: # %bb.0: 315 ; CHECK-NEXT: vblendps $8, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08] 316 ; CHECK-NEXT: # xmm0 = xmm1[0,1,2],xmm0[3] 317 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 318 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 319 ret <4 x float> %res 320 } 321 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone 322 323 324 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 325 ; CHECK-LABEL: test_x86_sse41_pblendw: 326 ; CHECK: # %bb.0: 327 ; CHECK-NEXT: vpblendw $7, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x07] 328 ; CHECK-NEXT: # xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7] 329 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 330 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1] 331 ret <8 x i16> %res 332 } 333 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone 334 335 336 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 337 ; AVX-LABEL: test_x86_sse41_pmovsxbd: 338 ; AVX: # %bb.0: 339 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x21,0xc0] 340 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 341 ; 342 ; AVX512VL-LABEL: test_x86_sse41_pmovsxbd: 343 ; AVX512VL: # %bb.0: 344 ; AVX512VL-NEXT: vpmovsxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0] 345 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 346 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 347 ret <4 x i32> %res 348 } 349 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 350 351 352 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 353 ; AVX-LABEL: test_x86_sse41_pmovsxbq: 354 ; AVX: # %bb.0: 355 ; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x22,0xc0] 356 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 357 ; 358 ; AVX512VL-LABEL: test_x86_sse41_pmovsxbq: 359 ; AVX512VL: # %bb.0: 360 ; AVX512VL-NEXT: vpmovsxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0] 361 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 362 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 363 ret <2 x i64> %res 364 } 365 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 366 367 368 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 369 ; AVX-LABEL: test_x86_sse41_pmovsxbw: 370 ; AVX: # %bb.0: 371 ; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x20,0xc0] 372 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 373 ; 374 ; AVX512VL-LABEL: test_x86_sse41_pmovsxbw: 375 ; AVX512VL: # %bb.0: 376 ; AVX512VL-NEXT: vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0] 377 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 378 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 379 ret <8 x i16> %res 380 } 381 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 382 383 384 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 385 ; AVX-LABEL: test_x86_sse41_pmovsxdq: 386 ; AVX: # %bb.0: 387 ; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x25,0xc0] 388 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 389 ; 390 ; AVX512VL-LABEL: test_x86_sse41_pmovsxdq: 391 ; AVX512VL: # %bb.0: 392 ; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0] 393 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 394 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 395 ret <2 x i64> %res 396 } 397 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 398 399 400 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 401 ; AVX-LABEL: test_x86_sse41_pmovsxwd: 402 ; AVX: # %bb.0: 403 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x23,0xc0] 404 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 405 ; 406 ; AVX512VL-LABEL: test_x86_sse41_pmovsxwd: 407 ; AVX512VL: # %bb.0: 408 ; AVX512VL-NEXT: vpmovsxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0] 409 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 410 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 411 ret <4 x i32> %res 412 } 413 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 414 415 416 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 417 ; AVX-LABEL: test_x86_sse41_pmovsxwq: 418 ; AVX: # %bb.0: 419 ; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x24,0xc0] 420 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 421 ; 422 ; AVX512VL-LABEL: test_x86_sse41_pmovsxwq: 423 ; AVX512VL: # %bb.0: 424 ; AVX512VL-NEXT: vpmovsxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0] 425 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 426 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 427 ret <2 x i64> %res 428 } 429 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 430 431 432 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 433 ; AVX-LABEL: test_x86_sse41_pmovzxbd: 434 ; AVX: # %bb.0: 435 ; AVX-NEXT: vpmovzxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x31,0xc0] 436 ; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 437 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 438 ; 439 ; AVX512VL-LABEL: test_x86_sse41_pmovzxbd: 440 ; AVX512VL: # %bb.0: 441 ; AVX512VL-NEXT: vpmovzxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0] 442 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 443 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 444 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 445 ret <4 x i32> %res 446 } 447 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 448 449 450 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 451 ; AVX-LABEL: test_x86_sse41_pmovzxbq: 452 ; AVX: # %bb.0: 453 ; AVX-NEXT: vpmovzxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x32,0xc0] 454 ; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 455 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 456 ; 457 ; AVX512VL-LABEL: test_x86_sse41_pmovzxbq: 458 ; AVX512VL: # %bb.0: 459 ; AVX512VL-NEXT: vpmovzxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0] 460 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 461 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 462 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 463 ret <2 x i64> %res 464 } 465 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 466 467 468 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 469 ; AVX-LABEL: test_x86_sse41_pmovzxbw: 470 ; AVX: # %bb.0: 471 ; AVX-NEXT: vpmovzxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x30,0xc0] 472 ; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 473 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 474 ; 475 ; AVX512VL-LABEL: test_x86_sse41_pmovzxbw: 476 ; AVX512VL: # %bb.0: 477 ; AVX512VL-NEXT: vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0] 478 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 479 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 480 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 481 ret <8 x i16> %res 482 } 483 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 484 485 486 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 487 ; AVX-LABEL: test_x86_sse41_pmovzxdq: 488 ; AVX: # %bb.0: 489 ; AVX-NEXT: vpmovzxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x35,0xc0] 490 ; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero 491 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 492 ; 493 ; AVX512VL-LABEL: test_x86_sse41_pmovzxdq: 494 ; AVX512VL: # %bb.0: 495 ; AVX512VL-NEXT: vpmovzxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0] 496 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero 497 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 498 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 499 ret <2 x i64> %res 500 } 501 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 502 503 504 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 505 ; AVX-LABEL: test_x86_sse41_pmovzxwd: 506 ; AVX: # %bb.0: 507 ; AVX-NEXT: vpmovzxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x33,0xc0] 508 ; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 509 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 510 ; 511 ; AVX512VL-LABEL: test_x86_sse41_pmovzxwd: 512 ; AVX512VL: # %bb.0: 513 ; AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0] 514 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 515 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 516 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 517 ret <4 x i32> %res 518 } 519 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 520 521 522 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 523 ; AVX-LABEL: test_x86_sse41_pmovzxwq: 524 ; AVX: # %bb.0: 525 ; AVX-NEXT: vpmovzxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x34,0xc0] 526 ; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 527 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 528 ; 529 ; AVX512VL-LABEL: test_x86_sse41_pmovzxwq: 530 ; AVX512VL: # %bb.0: 531 ; AVX512VL-NEXT: vpmovzxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0] 532 ; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 533 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 534 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 535 ret <2 x i64> %res 536 } 537 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 538 539 540 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 541 ; AVX-LABEL: test_x86_sse2_cvtdq2pd: 542 ; AVX: # %bb.0: 543 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0] 544 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 545 ; 546 ; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd: 547 ; AVX512VL: # %bb.0: 548 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0] 549 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 550 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 551 ret <2 x double> %res 552 } 553 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 554 555 556 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 557 ; AVX-LABEL: test_x86_avx_cvtdq2_pd_256: 558 ; AVX: # %bb.0: 559 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 # encoding: [0xc5,0xfe,0xe6,0xc0] 560 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 561 ; 562 ; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256: 563 ; AVX512VL: # %bb.0: 564 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xc0] 565 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 566 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 567 ret <4 x double> %res 568 } 569 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 570 571 572 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 573 ; AVX-LABEL: test_x86_sse2_cvtps2pd: 574 ; AVX: # %bb.0: 575 ; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0] 576 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 577 ; 578 ; AVX512VL-LABEL: test_x86_sse2_cvtps2pd: 579 ; AVX512VL: # %bb.0: 580 ; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] 581 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 582 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 583 ret <2 x double> %res 584 } 585 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 586 587 588 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 589 ; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256: 590 ; AVX: # %bb.0: 591 ; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0] 592 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 593 ; 594 ; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256: 595 ; AVX512VL: # %bb.0: 596 ; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0] 597 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 598 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 599 ret <4 x double> %res 600 } 601 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 602 603 604 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 605 ; add operation forces the execution domain. 606 ; X86-AVX-LABEL: test_x86_sse2_storeu_dq: 607 ; X86-AVX: # %bb.0: 608 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 609 ; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 610 ; X86-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 611 ; X86-AVX-NEXT: vmovdqu %xmm0, (%eax) # encoding: [0xc5,0xfa,0x7f,0x00] 612 ; X86-AVX-NEXT: retl # encoding: [0xc3] 613 ; 614 ; X86-AVX512VL-LABEL: test_x86_sse2_storeu_dq: 615 ; X86-AVX512VL: # %bb.0: 616 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 617 ; X86-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 618 ; X86-AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 619 ; X86-AVX512VL-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 620 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 621 ; 622 ; X64-AVX-LABEL: test_x86_sse2_storeu_dq: 623 ; X64-AVX: # %bb.0: 624 ; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 625 ; X64-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 626 ; X64-AVX-NEXT: vmovdqu %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x07] 627 ; X64-AVX-NEXT: retq # encoding: [0xc3] 628 ; 629 ; X64-AVX512VL-LABEL: test_x86_sse2_storeu_dq: 630 ; X64-AVX512VL: # %bb.0: 631 ; X64-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 632 ; X64-AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 633 ; X64-AVX512VL-NEXT: vmovdqu %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07] 634 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 635 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 636 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 637 ret void 638 } 639 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 640 641 642 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 643 ; fadd operation forces the execution domain. 644 ; X86-AVX-LABEL: test_x86_sse2_storeu_pd: 645 ; X86-AVX: # %bb.0: 646 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 647 ; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 648 ; X86-AVX-NEXT: vmovhpd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 649 ; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 650 ; X86-AVX-NEXT: # xmm1 = xmm1[0],mem[0] 651 ; X86-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 652 ; X86-AVX-NEXT: vmovupd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x11,0x00] 653 ; X86-AVX-NEXT: retl # encoding: [0xc3] 654 ; 655 ; X86-AVX512VL-LABEL: test_x86_sse2_storeu_pd: 656 ; X86-AVX512VL: # %bb.0: 657 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 658 ; X86-AVX512VL-NEXT: vmovsd {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A] 659 ; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 660 ; X86-AVX512VL-NEXT: # xmm1 = mem[0],zero 661 ; X86-AVX512VL-NEXT: vpslldq $8, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08] 662 ; X86-AVX512VL-NEXT: # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 663 ; X86-AVX512VL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 664 ; X86-AVX512VL-NEXT: vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00] 665 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 666 ; 667 ; X64-AVX-LABEL: test_x86_sse2_storeu_pd: 668 ; X64-AVX: # %bb.0: 669 ; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 670 ; X64-AVX-NEXT: vmovhpd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 671 ; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 672 ; X64-AVX-NEXT: # xmm1 = xmm1[0],mem[0] 673 ; X64-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 674 ; X64-AVX-NEXT: vmovupd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x11,0x07] 675 ; X64-AVX-NEXT: retq # encoding: [0xc3] 676 ; 677 ; X64-AVX512VL-LABEL: test_x86_sse2_storeu_pd: 678 ; X64-AVX512VL: # %bb.0: 679 ; X64-AVX512VL-NEXT: vmovsd {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A] 680 ; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 681 ; X64-AVX512VL-NEXT: # xmm1 = mem[0],zero 682 ; X64-AVX512VL-NEXT: vpslldq $8, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08] 683 ; X64-AVX512VL-NEXT: # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 684 ; X64-AVX512VL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 685 ; X64-AVX512VL-NEXT: vmovupd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07] 686 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 687 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 688 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 689 ret void 690 } 691 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 692 693 694 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 695 ; X86-AVX-LABEL: test_x86_sse_storeu_ps: 696 ; X86-AVX: # %bb.0: 697 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 698 ; X86-AVX-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 699 ; X86-AVX-NEXT: retl # encoding: [0xc3] 700 ; 701 ; X86-AVX512VL-LABEL: test_x86_sse_storeu_ps: 702 ; X86-AVX512VL: # %bb.0: 703 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 704 ; X86-AVX512VL-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 705 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 706 ; 707 ; X64-AVX-LABEL: test_x86_sse_storeu_ps: 708 ; X64-AVX: # %bb.0: 709 ; X64-AVX-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 710 ; X64-AVX-NEXT: retq # encoding: [0xc3] 711 ; 712 ; X64-AVX512VL-LABEL: test_x86_sse_storeu_ps: 713 ; X64-AVX512VL: # %bb.0: 714 ; X64-AVX512VL-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 715 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 716 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 717 ret void 718 } 719 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 720 721 722 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 723 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions 724 ; add operation forces the execution domain. 725 ; X86-AVX-LABEL: test_x86_avx_storeu_dq_256: 726 ; X86-AVX: # %bb.0: 727 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 728 ; X86-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] 729 ; X86-AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 730 ; X86-AVX-NEXT: vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca] 731 ; X86-AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2] 732 ; X86-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 733 ; X86-AVX-NEXT: vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00] 734 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 735 ; X86-AVX-NEXT: retl # encoding: [0xc3] 736 ; 737 ; X86-AVX512VL-LABEL: test_x86_avx_storeu_dq_256: 738 ; X86-AVX512VL: # %bb.0: 739 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 740 ; X86-AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] 741 ; X86-AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1] 742 ; X86-AVX512VL-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 743 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 744 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 745 ; 746 ; X64-AVX-LABEL: test_x86_avx_storeu_dq_256: 747 ; X64-AVX: # %bb.0: 748 ; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] 749 ; X64-AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 750 ; X64-AVX-NEXT: vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca] 751 ; X64-AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2] 752 ; X64-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 753 ; X64-AVX-NEXT: vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07] 754 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 755 ; X64-AVX-NEXT: retq # encoding: [0xc3] 756 ; 757 ; X64-AVX512VL-LABEL: test_x86_avx_storeu_dq_256: 758 ; X64-AVX512VL: # %bb.0: 759 ; X64-AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] 760 ; X64-AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1] 761 ; X64-AVX512VL-NEXT: vmovdqu %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x07] 762 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 763 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 764 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 765 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 766 ret void 767 } 768 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 769 770 771 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 772 ; add operation forces the execution domain. 773 ; X86-AVX-LABEL: test_x86_avx_storeu_pd_256: 774 ; X86-AVX: # %bb.0: 775 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 776 ; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 777 ; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1] 778 ; X86-AVX-NEXT: vmovupd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x11,0x00] 779 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 780 ; X86-AVX-NEXT: retl # encoding: [0xc3] 781 ; 782 ; X86-AVX512VL-LABEL: test_x86_avx_storeu_pd_256: 783 ; X86-AVX512VL: # %bb.0: 784 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 785 ; X86-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 786 ; X86-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 787 ; X86-AVX512VL-NEXT: vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00] 788 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 789 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 790 ; 791 ; X64-AVX-LABEL: test_x86_avx_storeu_pd_256: 792 ; X64-AVX: # %bb.0: 793 ; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 794 ; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1] 795 ; X64-AVX-NEXT: vmovupd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x11,0x07] 796 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 797 ; X64-AVX-NEXT: retq # encoding: [0xc3] 798 ; 799 ; X64-AVX512VL-LABEL: test_x86_avx_storeu_pd_256: 800 ; X64-AVX512VL: # %bb.0: 801 ; X64-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 802 ; X64-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 803 ; X64-AVX512VL-NEXT: vmovupd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x07] 804 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 805 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 806 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 807 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) 808 ret void 809 } 810 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 811 812 813 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 814 ; X86-AVX-LABEL: test_x86_avx_storeu_ps_256: 815 ; X86-AVX: # %bb.0: 816 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 817 ; X86-AVX-NEXT: vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00] 818 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 819 ; X86-AVX-NEXT: retl # encoding: [0xc3] 820 ; 821 ; X86-AVX512VL-LABEL: test_x86_avx_storeu_ps_256: 822 ; X86-AVX512VL: # %bb.0: 823 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 824 ; X86-AVX512VL-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] 825 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 826 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 827 ; 828 ; X64-AVX-LABEL: test_x86_avx_storeu_ps_256: 829 ; X64-AVX: # %bb.0: 830 ; X64-AVX-NEXT: vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07] 831 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 832 ; X64-AVX-NEXT: retq # encoding: [0xc3] 833 ; 834 ; X64-AVX512VL-LABEL: test_x86_avx_storeu_ps_256: 835 ; X64-AVX512VL: # %bb.0: 836 ; X64-AVX512VL-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 837 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 838 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 839 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 840 ret void 841 } 842 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 843 844 845 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 846 ; AVX-LABEL: test_x86_avx_vpermil_pd: 847 ; AVX: # %bb.0: 848 ; AVX-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 849 ; AVX-NEXT: # xmm0 = xmm0[1,0] 850 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 851 ; 852 ; AVX512VL-LABEL: test_x86_avx_vpermil_pd: 853 ; AVX512VL: # %bb.0: 854 ; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 855 ; AVX512VL-NEXT: # xmm0 = xmm0[1,0] 856 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 857 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1] 858 ret <2 x double> %res 859 } 860 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 861 862 863 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 864 ; AVX-LABEL: test_x86_avx_vpermil_pd_256: 865 ; AVX: # %bb.0: 866 ; AVX-NEXT: vpermilpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07] 867 ; AVX-NEXT: # ymm0 = ymm0[1,1,3,2] 868 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 869 ; 870 ; AVX512VL-LABEL: test_x86_avx_vpermil_pd_256: 871 ; AVX512VL: # %bb.0: 872 ; AVX512VL-NEXT: vpermilpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07] 873 ; AVX512VL-NEXT: # ymm0 = ymm0[1,1,3,2] 874 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 875 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 876 ret <4 x double> %res 877 } 878 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 879 880 881 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 882 ; AVX-LABEL: test_x86_avx_vpermil_ps: 883 ; AVX: # %bb.0: 884 ; AVX-NEXT: vpermilps $7, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07] 885 ; AVX-NEXT: # xmm0 = xmm0[3,1,0,0] 886 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 887 ; 888 ; AVX512VL-LABEL: test_x86_avx_vpermil_ps: 889 ; AVX512VL: # %bb.0: 890 ; AVX512VL-NEXT: vpermilps $7, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07] 891 ; AVX512VL-NEXT: # xmm0 = xmm0[3,1,0,0] 892 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 893 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 894 ret <4 x float> %res 895 } 896 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 897 898 899 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 900 ; AVX-LABEL: test_x86_avx_vpermil_ps_256: 901 ; AVX: # %bb.0: 902 ; AVX-NEXT: vpermilps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07] 903 ; AVX-NEXT: # ymm0 = ymm0[3,1,0,0,7,5,4,4] 904 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 905 ; 906 ; AVX512VL-LABEL: test_x86_avx_vpermil_ps_256: 907 ; AVX512VL: # %bb.0: 908 ; AVX512VL-NEXT: vpermilps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07] 909 ; AVX512VL-NEXT: # ymm0 = ymm0[3,1,0,0,7,5,4,4] 910 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 911 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 912 ret <8 x float> %res 913 } 914 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 915 916 917 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 918 ; AVX-LABEL: test_x86_avx_vperm2f128_pd_256: 919 ; AVX: # %bb.0: 920 ; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 921 ; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 922 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 923 ; 924 ; AVX512VL-LABEL: test_x86_avx_vperm2f128_pd_256: 925 ; AVX512VL: # %bb.0: 926 ; AVX512VL-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 927 ; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 928 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 929 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) ; <<4 x double>> [#uses=1] 930 ret <4 x double> %res 931 } 932 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 933 934 935 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 936 ; AVX-LABEL: test_x86_avx_vperm2f128_ps_256: 937 ; AVX: # %bb.0: 938 ; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 939 ; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 940 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 941 ; 942 ; AVX512VL-LABEL: test_x86_avx_vperm2f128_ps_256: 943 ; AVX512VL: # %bb.0: 944 ; AVX512VL-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 945 ; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 946 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 947 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 3) ; <<8 x float>> [#uses=1] 948 ret <8 x float> %res 949 } 950 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 951 952 953 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 954 ; AVX-LABEL: test_x86_avx_vperm2f128_si_256: 955 ; AVX: # %bb.0: 956 ; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 957 ; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 958 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 959 ; 960 ; AVX512VL-LABEL: test_x86_avx_vperm2f128_si_256: 961 ; AVX512VL: # %bb.0: 962 ; AVX512VL-NEXT: vperm2i128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x46,0xc0,0x21] 963 ; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 964 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 965 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 3) ; <<8 x i32>> [#uses=1] 966 ret <8 x i32> %res 967 } 968 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 969 970 971 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 972 ; AVX-LABEL: test_x86_avx_cvtdq2_ps_256: 973 ; AVX: # %bb.0: 974 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5b,0xc0] 975 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 976 ; 977 ; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256: 978 ; AVX512VL: # %bb.0: 979 ; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0] 980 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 981 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 982 ret <8 x float> %res 983 } 984 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 985