1 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s 2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s 3 4 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { 5 ; CHECK: blendpd 6 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 7 ret <2 x double> %res 8 } 9 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone 10 11 12 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { 13 ; CHECK: blendps 14 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 15 ret <4 x float> %res 16 } 17 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone 18 19 20 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 21 ; CHECK: blendvpd 22 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 23 ret <2 x double> %res 24 } 25 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 26 27 28 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 29 ; CHECK: blendvps 30 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 31 ret <4 x float> %res 32 } 33 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 34 35 36 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 37 ; CHECK: dppd 38 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 39 ret <2 x double> %res 40 } 41 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 42 43 44 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 45 ; CHECK: dpps 46 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 47 ret <4 x float> %res 48 } 49 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 50 51 52 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 53 ; CHECK: insertps 54 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 55 ret <4 x float> %res 56 } 57 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 58 59 60 61 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 62 ; CHECK: mpsadbw 63 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] 64 ret <8 x i16> %res 65 } 66 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 67 68 69 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 70 ; CHECK: packusdw 71 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 72 ret <8 x i16> %res 73 } 74 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 75 76 77 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 78 ; CHECK: pblendvb 79 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 80 ret <16 x i8> %res 81 } 82 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 83 84 85 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 86 ; CHECK: pblendw 87 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1] 88 ret <8 x i16> %res 89 } 90 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone 91 92 93 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 94 ; CHECK: phminposuw 95 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 96 ret <8 x i16> %res 97 } 98 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 99 100 101 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 102 ; CHECK: pmaxsb 103 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 104 ret <16 x i8> %res 105 } 106 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 107 108 109 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 110 ; CHECK: pmaxsd 111 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 112 ret <4 x i32> %res 113 } 114 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 115 116 117 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 118 ; CHECK: pmaxud 119 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 120 ret <4 x i32> %res 121 } 122 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 123 124 125 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 126 ; CHECK: pmaxuw 127 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 128 ret <8 x i16> %res 129 } 130 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 131 132 133 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 134 ; CHECK: pminsb 135 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 136 ret <16 x i8> %res 137 } 138 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 139 140 141 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 142 ; CHECK: pminsd 143 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 144 ret <4 x i32> %res 145 } 146 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 147 148 149 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 150 ; CHECK: pminud 151 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 152 ret <4 x i32> %res 153 } 154 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 155 156 157 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 158 ; CHECK: pminuw 159 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 160 ret <8 x i16> %res 161 } 162 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 163 164 165 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 166 ; CHECK: pmovzxbd 167 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 168 ret <4 x i32> %res 169 } 170 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 171 172 173 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 174 ; CHECK: pmovzxbq 175 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 176 ret <2 x i64> %res 177 } 178 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 179 180 181 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 182 ; CHECK: pmovzxbw 183 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 184 ret <8 x i16> %res 185 } 186 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 187 188 189 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 190 ; CHECK: pmovzxdq 191 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 192 ret <2 x i64> %res 193 } 194 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 195 196 197 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 198 ; CHECK: pmovzxwd 199 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 200 ret <4 x i32> %res 201 } 202 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 203 204 205 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 206 ; CHECK: pmovzxwq 207 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 208 ret <2 x i64> %res 209 } 210 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 211 212 213 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 214 ; CHECK: pmuldq 215 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 216 ret <2 x i64> %res 217 } 218 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 219 220 221 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { 222 ; CHECK: ptest 223 ; CHECK: sbbl 224 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 225 ret i32 %res 226 } 227 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 228 229 230 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { 231 ; CHECK: ptest 232 ; CHECK: seta 233 ; CHECK: movzbl 234 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 235 ret i32 %res 236 } 237 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 238 239 240 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { 241 ; CHECK: ptest 242 ; CHECK: sete 243 ; CHECK: movzbl 244 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 245 ret i32 %res 246 } 247 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 248 249 250 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 251 ; CHECK: roundpd 252 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 253 ret <2 x double> %res 254 } 255 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 256 257 258 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 259 ; CHECK: roundps 260 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 261 ret <4 x float> %res 262 } 263 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 264 265 266 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 267 ; CHECK: roundsd 268 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 269 ret <2 x double> %res 270 } 271 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 272 273 274 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 275 ; CHECK: roundss 276 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 277 ret <4 x float> %res 278 } 279 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 280