1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s 2 3 ; We don't check any vinsertf128 variant with immediate 0 because that's just a blend. 4 5 define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) { 6 ; CHECK-LABEL: test_x86_avx_vinsertf128_pd_256_1: 7 ; CHECK: vinsertf128 $1, %xmm1, %ymm0, %ymm0 8 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1) 9 ret <4 x double> %res 10 } 11 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone 12 13 define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) { 14 ; CHECK-LABEL: test_x86_avx_vinsertf128_ps_256_1: 15 ; CHECK: vinsertf128 $1, %xmm1, %ymm0, %ymm0 16 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1) 17 ret <8 x float> %res 18 } 19 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone 20 21 define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) { 22 ; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_1: 23 ; CHECK: vinsertf128 $1, %xmm1, %ymm0, %ymm0 24 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1) 25 ret <8 x i32> %res 26 } 27 28 ; Verify that high bits of the immediate are masked off. This should be the equivalent 29 ; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's 30 ; not a vinsertf128 $1. 31 define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) { 32 ; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2: 33 ; CHECK-NOT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 34 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2) 35 ret <8 x i32> %res 36 } 37 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone 38 39 ; We don't check any vextractf128 variant with immediate 0 because that's just a move. 40 41 define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) { 42 ; CHECK-LABEL: test_x86_avx_vextractf128_pd_256_1: 43 ; CHECK: vextractf128 $1, %ymm0, %xmm0 44 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1) 45 ret <2 x double> %res 46 } 47 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone 48 49 define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) { 50 ; CHECK-LABEL: test_x86_avx_vextractf128_ps_256_1: 51 ; CHECK: vextractf128 $1, %ymm0, %xmm0 52 %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1) 53 ret <4 x float> %res 54 } 55 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone 56 57 define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) { 58 ; CHECK-LABEL: test_x86_avx_vextractf128_si_256_1: 59 ; CHECK: vextractf128 $1, %ymm0, %xmm0 60 %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1) 61 ret <4 x i32> %res 62 } 63 declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone 64 65 ; Verify that high bits of the immediate are masked off. This should be the equivalent 66 ; of a vextractf128 $0 which should be optimized away, so just check that it's 67 ; not a vextractf128 of any kind. 68 define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) { 69 ; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2: 70 ; CHECK-NOT: vextractf128 71 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2) 72 ret <2 x double> %res 73 } 74 75 76 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { 77 ; CHECK-LABEL: test_x86_avx_blend_pd_256: 78 ; CHECK: vblendpd 79 %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] 80 ret <4 x double> %res 81 } 82 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone 83 84 85 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { 86 ; CHECK-LABEL: test_x86_avx_blend_ps_256: 87 ; CHECK: vblendps 88 %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 89 ret <8 x float> %res 90 } 91 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 92 93 94 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 95 ; CHECK-LABEL: test_x86_avx_dp_ps_256: 96 ; CHECK: vdpps 97 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 98 ret <8 x float> %res 99 } 100 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 101 102 103 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 104 ; CHECK-LABEL: test_x86_sse2_psll_dq: 105 ; CHECK: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 106 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 107 ret <2 x i64> %res 108 } 109 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 110 111 112 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 113 ; CHECK-LABEL: test_x86_sse2_psrl_dq: 114 ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 115 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 116 ret <2 x i64> %res 117 } 118 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 119 120 121 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { 122 ; CHECK-LABEL: test_x86_sse41_blendpd: 123 ; CHECK: vblendpd 124 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1] 125 ret <2 x double> %res 126 } 127 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone 128 129 130 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { 131 ; CHECK-LABEL: test_x86_sse41_blendps: 132 ; CHECK: vblendps 133 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 134 ret <4 x float> %res 135 } 136 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone 137 138 139 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 140 ; CHECK-LABEL: test_x86_sse41_pblendw: 141 ; CHECK: vpblendw 142 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1] 143 ret <8 x i16> %res 144 } 145 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone 146 147 148 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 149 ; CHECK-LABEL: test_x86_sse41_pmovsxbd: 150 ; CHECK: # BB#0: 151 ; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0 152 ; CHECK-NEXT: retl 153 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 154 ret <4 x i32> %res 155 } 156 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 157 158 159 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 160 ; CHECK-LABEL: test_x86_sse41_pmovsxbq: 161 ; CHECK: # BB#0: 162 ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 163 ; CHECK-NEXT: retl 164 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 165 ret <2 x i64> %res 166 } 167 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 168 169 170 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 171 ; CHECK-LABEL: test_x86_sse41_pmovsxbw: 172 ; CHECK: # BB#0: 173 ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 174 ; CHECK-NEXT: retl 175 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 176 ret <8 x i16> %res 177 } 178 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 179 180 181 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 182 ; CHECK-LABEL: test_x86_sse41_pmovsxdq: 183 ; CHECK: # BB#0: 184 ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 185 ; CHECK-NEXT: retl 186 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 187 ret <2 x i64> %res 188 } 189 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 190 191 192 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 193 ; CHECK-LABEL: test_x86_sse41_pmovsxwd: 194 ; CHECK: # BB#0: 195 ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 196 ; CHECK-NEXT: retl 197 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 198 ret <4 x i32> %res 199 } 200 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 201 202 203 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 204 ; CHECK-LABEL: test_x86_sse41_pmovsxwq: 205 ; CHECK: # BB#0: 206 ; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0 207 ; CHECK-NEXT: retl 208 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 209 ret <2 x i64> %res 210 } 211 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 212