1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s 2 3 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { 4 ; CHECK: vaesdec 5 %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 6 ret <2 x i64> %res 7 } 8 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone 9 10 11 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { 12 ; CHECK: vaesdeclast 13 %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 14 ret <2 x i64> %res 15 } 16 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone 17 18 19 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { 20 ; CHECK: vaesenc 21 %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 22 ret <2 x i64> %res 23 } 24 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone 25 26 27 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { 28 ; CHECK: vaesenclast 29 %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 30 ret <2 x i64> %res 31 } 32 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone 33 34 35 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { 36 ; CHECK: vaesimc 37 %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 38 ret <2 x i64> %res 39 } 40 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone 41 42 43 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { 44 ; CHECK: vaeskeygenassist 45 %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] 46 ret <2 x i64> %res 47 } 48 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone 49 50 51 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 52 ; CHECK: vaddsd 53 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 54 ret <2 x double> %res 55 } 56 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 57 58 59 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 60 ; CHECK: vcmpordpd 61 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 62 ret <2 x double> %res 63 } 64 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 65 66 67 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 68 ; CHECK: vcmpordsd 69 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 70 ret <2 x double> %res 71 } 72 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 73 74 75 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 76 ; CHECK: vcomisd 77 ; CHECK: sete 78 ; CHECK: movzbl 79 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 80 ret i32 %res 81 } 82 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 83 84 85 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 86 ; CHECK: vcomisd 87 ; CHECK: setae 88 ; CHECK: movzbl 89 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 90 ret i32 %res 91 } 92 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 93 94 95 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 96 ; CHECK: vcomisd 97 ; CHECK: seta 98 ; CHECK: movzbl 99 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 100 ret i32 %res 101 } 102 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 103 104 105 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 106 ; CHECK: vcomisd 107 ; CHECK: setbe 108 ; CHECK: movzbl 109 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 110 ret i32 %res 111 } 112 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 113 114 115 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 116 ; CHECK: vcomisd 117 ; CHECK: sbbl %eax, %eax 118 ; CHECK: andl $1, %eax 119 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 120 ret i32 %res 121 } 122 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 123 124 125 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 126 ; CHECK: vcomisd 127 ; CHECK: setne 128 ; CHECK: movzbl 129 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 130 ret i32 %res 131 } 132 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 133 134 135 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 136 ; CHECK: vcvtdq2pd 137 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 138 ret <2 x double> %res 139 } 140 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 141 142 143 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 144 ; CHECK: vcvtdq2ps 145 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 146 ret <4 x float> %res 147 } 148 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 149 150 151 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 152 ; CHECK: vcvtpd2dq 153 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 154 ret <4 x i32> %res 155 } 156 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 157 158 159 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 160 ; CHECK: vcvtpd2ps 161 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 162 ret <4 x float> %res 163 } 164 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 165 166 167 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 168 ; CHECK: vcvtps2dq 169 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 170 ret <4 x i32> %res 171 } 172 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 173 174 175 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 176 ; CHECK: vcvtps2pd 177 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 178 ret <2 x double> %res 179 } 180 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 181 182 183 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 184 ; CHECK: vcvtsd2si 185 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 186 ret i32 %res 187 } 188 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 189 190 191 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 192 ; CHECK: vcvtsd2ss 193 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 194 ret <4 x float> %res 195 } 196 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 197 198 199 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 200 ; CHECK: movl 201 ; CHECK: vcvtsi2sd 202 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 203 ret <2 x double> %res 204 } 205 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 206 207 208 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 209 ; CHECK: vcvtss2sd 210 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 211 ret <2 x double> %res 212 } 213 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 214 215 216 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 217 ; CHECK: vcvttpd2dq 218 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 219 ret <4 x i32> %res 220 } 221 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 222 223 224 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 225 ; CHECK: vcvttps2dq 226 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 227 ret <4 x i32> %res 228 } 229 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 230 231 232 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 233 ; CHECK: vcvttsd2si 234 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 235 ret i32 %res 236 } 237 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 238 239 240 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 241 ; CHECK: vdivsd 242 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 243 ret <2 x double> %res 244 } 245 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 246 247 248 249 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 250 ; CHECK: vmaxpd 251 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 252 ret <2 x double> %res 253 } 254 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 255 256 257 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 258 ; CHECK: vmaxsd 259 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 260 ret <2 x double> %res 261 } 262 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 263 264 265 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 266 ; CHECK: vminpd 267 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 268 ret <2 x double> %res 269 } 270 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 271 272 273 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 274 ; CHECK: vminsd 275 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 276 ret <2 x double> %res 277 } 278 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 279 280 281 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 282 ; CHECK: vmovmskpd 283 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 284 ret i32 %res 285 } 286 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 287 288 289 290 291 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 292 ; CHECK: test_x86_sse2_mul_sd 293 ; CHECK: vmulsd 294 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 295 ret <2 x double> %res 296 } 297 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 298 299 300 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 301 ; CHECK: vpackssdw 302 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 303 ret <8 x i16> %res 304 } 305 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 306 307 308 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 309 ; CHECK: vpacksswb 310 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 311 ret <16 x i8> %res 312 } 313 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 314 315 316 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 317 ; CHECK: vpackuswb 318 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 319 ret <16 x i8> %res 320 } 321 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 322 323 324 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 325 ; CHECK: vpaddsb 326 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 327 ret <16 x i8> %res 328 } 329 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 330 331 332 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 333 ; CHECK: vpaddsw 334 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 335 ret <8 x i16> %res 336 } 337 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 338 339 340 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 341 ; CHECK: vpaddusb 342 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 343 ret <16 x i8> %res 344 } 345 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 346 347 348 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 349 ; CHECK: vpaddusw 350 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 351 ret <8 x i16> %res 352 } 353 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 354 355 356 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 357 ; CHECK: vpavgb 358 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 359 ret <16 x i8> %res 360 } 361 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 362 363 364 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 365 ; CHECK: vpavgw 366 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 367 ret <8 x i16> %res 368 } 369 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 370 371 372 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 373 ; CHECK: vpmaddwd 374 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 375 ret <4 x i32> %res 376 } 377 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 378 379 380 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 381 ; CHECK: vpmaxsw 382 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 383 ret <8 x i16> %res 384 } 385 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 386 387 388 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 389 ; CHECK: vpmaxub 390 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 391 ret <16 x i8> %res 392 } 393 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 394 395 396 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 397 ; CHECK: vpminsw 398 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 399 ret <8 x i16> %res 400 } 401 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 402 403 404 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 405 ; CHECK: vpminub 406 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 407 ret <16 x i8> %res 408 } 409 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 410 411 412 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 413 ; CHECK: vpmovmskb 414 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 415 ret i32 %res 416 } 417 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 418 419 420 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 421 ; CHECK: vpmulhw 422 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 423 ret <8 x i16> %res 424 } 425 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 426 427 428 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 429 ; CHECK: vpmulhuw 430 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 431 ret <8 x i16> %res 432 } 433 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 434 435 436 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 437 ; CHECK: vpmuludq 438 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 439 ret <2 x i64> %res 440 } 441 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 442 443 444 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 445 ; CHECK: vpsadbw 446 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 447 ret <2 x i64> %res 448 } 449 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 450 451 452 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 453 ; CHECK: vpslld 454 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 455 ret <4 x i32> %res 456 } 457 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 458 459 460 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 461 ; CHECK: vpslldq 462 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 463 ret <2 x i64> %res 464 } 465 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 466 467 468 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { 469 ; CHECK: vpslldq 470 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 471 ret <2 x i64> %res 472 } 473 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone 474 475 476 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 477 ; CHECK: vpsllq 478 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 479 ret <2 x i64> %res 480 } 481 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 482 483 484 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 485 ; CHECK: vpsllw 486 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 487 ret <8 x i16> %res 488 } 489 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 490 491 492 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 493 ; CHECK: vpslld 494 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 495 ret <4 x i32> %res 496 } 497 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 498 499 500 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 501 ; CHECK: vpsllq 502 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 503 ret <2 x i64> %res 504 } 505 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 506 507 508 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 509 ; CHECK: vpsllw 510 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 511 ret <8 x i16> %res 512 } 513 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 514 515 516 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 517 ; CHECK: vpsrad 518 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 519 ret <4 x i32> %res 520 } 521 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 522 523 524 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 525 ; CHECK: vpsraw 526 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 527 ret <8 x i16> %res 528 } 529 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 530 531 532 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 533 ; CHECK: vpsrad 534 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 535 ret <4 x i32> %res 536 } 537 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 538 539 540 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 541 ; CHECK: vpsraw 542 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 543 ret <8 x i16> %res 544 } 545 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 546 547 548 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 549 ; CHECK: vpsrld 550 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 551 ret <4 x i32> %res 552 } 553 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 554 555 556 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 557 ; CHECK: vpsrldq 558 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 559 ret <2 x i64> %res 560 } 561 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 562 563 564 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { 565 ; CHECK: vpsrldq 566 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 567 ret <2 x i64> %res 568 } 569 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone 570 571 572 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 573 ; CHECK: vpsrlq 574 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 575 ret <2 x i64> %res 576 } 577 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 578 579 580 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 581 ; CHECK: vpsrlw 582 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 583 ret <8 x i16> %res 584 } 585 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 586 587 588 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 589 ; CHECK: vpsrld 590 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 591 ret <4 x i32> %res 592 } 593 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 594 595 596 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 597 ; CHECK: vpsrlq 598 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 599 ret <2 x i64> %res 600 } 601 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 602 603 604 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 605 ; CHECK: vpsrlw 606 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 607 ret <8 x i16> %res 608 } 609 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 610 611 612 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 613 ; CHECK: vpsubsb 614 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 615 ret <16 x i8> %res 616 } 617 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 618 619 620 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 621 ; CHECK: vpsubsw 622 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 623 ret <8 x i16> %res 624 } 625 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 626 627 628 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 629 ; CHECK: vpsubusb 630 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 631 ret <16 x i8> %res 632 } 633 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 634 635 636 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 637 ; CHECK: vpsubusw 638 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 639 ret <8 x i16> %res 640 } 641 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 642 643 644 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 645 ; CHECK: vsqrtpd 646 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 647 ret <2 x double> %res 648 } 649 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 650 651 652 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 653 ; CHECK: vsqrtsd 654 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 655 ret <2 x double> %res 656 } 657 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 658 659 660 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 661 ; CHECK: test_x86_sse2_storel_dq 662 ; CHECK: movl 663 ; CHECK: vmovq 664 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 665 ret void 666 } 667 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 668 669 670 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 671 ; CHECK: test_x86_sse2_storeu_dq 672 ; CHECK: movl 673 ; CHECK: vmovdqu 674 ; add operation forces the execution domain. 675 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 676 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 677 ret void 678 } 679 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 680 681 682 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 683 ; CHECK: test_x86_sse2_storeu_pd 684 ; CHECK: movl 685 ; CHECK: vmovupd 686 ; fadd operation forces the execution domain. 687 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 688 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 689 ret void 690 } 691 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 692 693 694 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 695 ; CHECK: test_x86_sse2_sub_sd 696 ; CHECK: vsubsd 697 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 698 ret <2 x double> %res 699 } 700 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 701 702 703 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 704 ; CHECK: vucomisd 705 ; CHECK: sete 706 ; CHECK: movzbl 707 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 708 ret i32 %res 709 } 710 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 711 712 713 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 714 ; CHECK: vucomisd 715 ; CHECK: setae 716 ; CHECK: movzbl 717 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 718 ret i32 %res 719 } 720 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 721 722 723 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 724 ; CHECK: vucomisd 725 ; CHECK: seta 726 ; CHECK: movzbl 727 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 728 ret i32 %res 729 } 730 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 731 732 733 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 734 ; CHECK: vucomisd 735 ; CHECK: setbe 736 ; CHECK: movzbl 737 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 738 ret i32 %res 739 } 740 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 741 742 743 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 744 ; CHECK: vucomisd 745 ; CHECK: sbbl 746 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 747 ret i32 %res 748 } 749 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 750 751 752 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 753 ; CHECK: vucomisd 754 ; CHECK: setne 755 ; CHECK: movzbl 756 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 757 ret i32 %res 758 } 759 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 760 761 762 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { 763 ; CHECK: vaddsubpd 764 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 765 ret <2 x double> %res 766 } 767 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 768 769 770 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { 771 ; CHECK: vaddsubps 772 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 773 ret <4 x float> %res 774 } 775 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 776 777 778 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { 779 ; CHECK: vhaddpd 780 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 781 ret <2 x double> %res 782 } 783 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 784 785 786 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { 787 ; CHECK: vhaddps 788 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 789 ret <4 x float> %res 790 } 791 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 792 793 794 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { 795 ; CHECK: vhsubpd 796 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 797 ret <2 x double> %res 798 } 799 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 800 801 802 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { 803 ; CHECK: vhsubps 804 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 805 ret <4 x float> %res 806 } 807 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 808 809 810 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { 811 ; CHECK: movl 812 ; CHECK: vlddqu 813 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 814 ret <16 x i8> %res 815 } 816 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 817 818 819 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { 820 ; CHECK: vblendpd 821 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 822 ret <2 x double> %res 823 } 824 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone 825 826 827 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { 828 ; CHECK: vblendps 829 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 830 ret <4 x float> %res 831 } 832 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone 833 834 835 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 836 ; CHECK: vblendvpd 837 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 838 ret <2 x double> %res 839 } 840 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 841 842 843 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 844 ; CHECK: vblendvps 845 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 846 ret <4 x float> %res 847 } 848 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 849 850 851 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 852 ; CHECK: vdppd 853 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 854 ret <2 x double> %res 855 } 856 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone 857 858 859 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 860 ; CHECK: vdpps 861 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 862 ret <4 x float> %res 863 } 864 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone 865 866 867 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 868 ; CHECK: vinsertps 869 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 870 ret <4 x float> %res 871 } 872 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone 873 874 875 876 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 877 ; CHECK: vmpsadbw 878 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1] 879 ret <8 x i16> %res 880 } 881 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone 882 883 884 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 885 ; CHECK: vpackusdw 886 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 887 ret <8 x i16> %res 888 } 889 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 890 891 892 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 893 ; CHECK: vpblendvb 894 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 895 ret <16 x i8> %res 896 } 897 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 898 899 900 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 901 ; CHECK: vpblendw 902 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1] 903 ret <8 x i16> %res 904 } 905 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone 906 907 908 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 909 ; CHECK: vphminposuw 910 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 911 ret <8 x i16> %res 912 } 913 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 914 915 916 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 917 ; CHECK: vpmaxsb 918 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 919 ret <16 x i8> %res 920 } 921 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 922 923 924 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 925 ; CHECK: vpmaxsd 926 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 927 ret <4 x i32> %res 928 } 929 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 930 931 932 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 933 ; CHECK: vpmaxud 934 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 935 ret <4 x i32> %res 936 } 937 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 938 939 940 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 941 ; CHECK: vpmaxuw 942 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 943 ret <8 x i16> %res 944 } 945 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 946 947 948 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 949 ; CHECK: vpminsb 950 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 951 ret <16 x i8> %res 952 } 953 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 954 955 956 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 957 ; CHECK: vpminsd 958 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 959 ret <4 x i32> %res 960 } 961 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 962 963 964 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 965 ; CHECK: vpminud 966 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 967 ret <4 x i32> %res 968 } 969 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 970 971 972 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 973 ; CHECK: vpminuw 974 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 975 ret <8 x i16> %res 976 } 977 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 978 979 980 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 981 ; CHECK: vpmovsxbd 982 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 983 ret <4 x i32> %res 984 } 985 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 986 987 988 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 989 ; CHECK: vpmovsxbq 990 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 991 ret <2 x i64> %res 992 } 993 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 994 995 996 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 997 ; CHECK: vpmovsxbw 998 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 999 ret <8 x i16> %res 1000 } 1001 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 1002 1003 1004 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 1005 ; CHECK: vpmovsxdq 1006 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1007 ret <2 x i64> %res 1008 } 1009 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 1010 1011 1012 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 1013 ; CHECK: vpmovsxwd 1014 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1015 ret <4 x i32> %res 1016 } 1017 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 1018 1019 1020 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 1021 ; CHECK: vpmovsxwq 1022 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1023 ret <2 x i64> %res 1024 } 1025 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 1026 1027 1028 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 1029 ; CHECK: vpmovzxbd 1030 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1031 ret <4 x i32> %res 1032 } 1033 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 1034 1035 1036 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 1037 ; CHECK: vpmovzxbq 1038 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1039 ret <2 x i64> %res 1040 } 1041 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 1042 1043 1044 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 1045 ; CHECK: vpmovzxbw 1046 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1047 ret <8 x i16> %res 1048 } 1049 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 1050 1051 1052 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 1053 ; CHECK: vpmovzxdq 1054 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1055 ret <2 x i64> %res 1056 } 1057 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 1058 1059 1060 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 1061 ; CHECK: vpmovzxwd 1062 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1063 ret <4 x i32> %res 1064 } 1065 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 1066 1067 1068 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 1069 ; CHECK: vpmovzxwq 1070 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1071 ret <2 x i64> %res 1072 } 1073 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 1074 1075 1076 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 1077 ; CHECK: vpmuldq 1078 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 1079 ret <2 x i64> %res 1080 } 1081 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 1082 1083 1084 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { 1085 ; CHECK: vptest 1086 ; CHECK: sbbl 1087 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1088 ret i32 %res 1089 } 1090 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 1091 1092 1093 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { 1094 ; CHECK: vptest 1095 ; CHECK: seta 1096 ; CHECK: movzbl 1097 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1098 ret i32 %res 1099 } 1100 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 1101 1102 1103 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { 1104 ; CHECK: vptest 1105 ; CHECK: sete 1106 ; CHECK: movzbl 1107 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1108 ret i32 %res 1109 } 1110 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 1111 1112 1113 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 1114 ; CHECK: vroundpd 1115 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 1116 ret <2 x double> %res 1117 } 1118 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1119 1120 1121 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 1122 ; CHECK: vroundps 1123 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1124 ret <4 x float> %res 1125 } 1126 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1127 1128 1129 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 1130 ; CHECK: vroundsd 1131 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 1132 ret <2 x double> %res 1133 } 1134 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1135 1136 1137 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 1138 ; CHECK: vroundss 1139 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 1140 ret <4 x float> %res 1141 } 1142 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1143 1144 1145 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { 1146 ; CHECK: movl $7 1147 ; CHECK: movl $7 1148 ; CHECK: vpcmpestri $7 1149 ; CHECK: movl 1150 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1151 ret i32 %res 1152 } 1153 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1154 1155 1156 define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) { 1157 ; CHECK: movl $7 1158 ; CHECK: movl $7 1159 ; CHECK: vpcmpestri $7, ( 1160 ; CHECK: movl 1161 %1 = load <16 x i8>* %a0 1162 %2 = load <16 x i8>* %a2 1163 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1] 1164 ret i32 %res 1165 } 1166 1167 1168 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { 1169 ; CHECK: movl 1170 ; CHECK: movl 1171 ; CHECK: vpcmpestri 1172 ; CHECK: seta 1173 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1174 ret i32 %res 1175 } 1176 declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1177 1178 1179 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { 1180 ; CHECK: movl 1181 ; CHECK: movl 1182 ; CHECK: vpcmpestri 1183 ; CHECK: sbbl 1184 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1185 ret i32 %res 1186 } 1187 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1188 1189 1190 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { 1191 ; CHECK: movl 1192 ; CHECK: movl 1193 ; CHECK: vpcmpestri 1194 ; CHECK: seto 1195 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1196 ret i32 %res 1197 } 1198 declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1199 1200 1201 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { 1202 ; CHECK: movl 1203 ; CHECK: movl 1204 ; CHECK: vpcmpestri 1205 ; CHECK: sets 1206 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1207 ret i32 %res 1208 } 1209 declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1210 1211 1212 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { 1213 ; CHECK: movl 1214 ; CHECK: movl 1215 ; CHECK: vpcmpestri 1216 ; CHECK: sete 1217 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1218 ret i32 %res 1219 } 1220 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1221 1222 1223 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { 1224 ; CHECK: movl 1225 ; CHECK: movl 1226 ; CHECK: vpcmpestrm 1227 ; CHECK-NOT: vmov 1228 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1229 ret <16 x i8> %res 1230 } 1231 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1232 1233 1234 define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) { 1235 ; CHECK: movl $7 1236 ; CHECK: movl $7 1237 ; CHECK: vpcmpestrm $7, 1238 ; CHECK-NOT: vmov 1239 %1 = load <16 x i8>* %a2 1240 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1241 ret <16 x i8> %res 1242 } 1243 1244 1245 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { 1246 ; CHECK: vpcmpistri $7 1247 ; CHECK: movl 1248 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1249 ret i32 %res 1250 } 1251 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1252 1253 1254 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) { 1255 ; CHECK: vpcmpistri $7, ( 1256 ; CHECK: movl 1257 %1 = load <16 x i8>* %a0 1258 %2 = load <16 x i8>* %a1 1259 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1] 1260 ret i32 %res 1261 } 1262 1263 1264 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { 1265 ; CHECK: vpcmpistri 1266 ; CHECK: seta 1267 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1268 ret i32 %res 1269 } 1270 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1271 1272 1273 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { 1274 ; CHECK: vpcmpistri 1275 ; CHECK: sbbl 1276 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1277 ret i32 %res 1278 } 1279 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1280 1281 1282 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { 1283 ; CHECK: vpcmpistri 1284 ; CHECK: seto 1285 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1286 ret i32 %res 1287 } 1288 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1289 1290 1291 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { 1292 ; CHECK: vpcmpistri 1293 ; CHECK: sets 1294 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1295 ret i32 %res 1296 } 1297 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1298 1299 1300 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { 1301 ; CHECK: vpcmpistri 1302 ; CHECK: sete 1303 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1304 ret i32 %res 1305 } 1306 declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1307 1308 1309 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { 1310 ; CHECK: vpcmpistrm $7 1311 ; CHECK-NOT: vmov 1312 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] 1313 ret <16 x i8> %res 1314 } 1315 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1316 1317 1318 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) { 1319 ; CHECK: vpcmpistrm $7, ( 1320 ; CHECK-NOT: vmov 1321 %1 = load <16 x i8>* %a1 1322 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1] 1323 ret <16 x i8> %res 1324 } 1325 1326 1327 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { 1328 ; CHECK: vaddss 1329 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1330 ret <4 x float> %res 1331 } 1332 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 1333 1334 1335 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { 1336 ; CHECK: vcmpordps 1337 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1338 ret <4 x float> %res 1339 } 1340 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 1341 1342 1343 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { 1344 ; CHECK: vcmpordss 1345 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1346 ret <4 x float> %res 1347 } 1348 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 1349 1350 1351 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { 1352 ; CHECK: vcomiss 1353 ; CHECK: sete 1354 ; CHECK: movzbl 1355 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1356 ret i32 %res 1357 } 1358 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 1359 1360 1361 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { 1362 ; CHECK: vcomiss 1363 ; CHECK: setae 1364 ; CHECK: movzbl 1365 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1366 ret i32 %res 1367 } 1368 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 1369 1370 1371 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { 1372 ; CHECK: vcomiss 1373 ; CHECK: seta 1374 ; CHECK: movzbl 1375 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1376 ret i32 %res 1377 } 1378 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 1379 1380 1381 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { 1382 ; CHECK: vcomiss 1383 ; CHECK: setbe 1384 ; CHECK: movzbl 1385 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1386 ret i32 %res 1387 } 1388 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 1389 1390 1391 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { 1392 ; CHECK: vcomiss 1393 ; CHECK: sbb 1394 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1395 ret i32 %res 1396 } 1397 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 1398 1399 1400 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { 1401 ; CHECK: vcomiss 1402 ; CHECK: setne 1403 ; CHECK: movzbl 1404 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1405 ret i32 %res 1406 } 1407 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 1408 1409 1410 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { 1411 ; CHECK: movl 1412 ; CHECK: vcvtsi2ss 1413 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1414 ret <4 x float> %res 1415 } 1416 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 1417 1418 1419 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { 1420 ; CHECK: vcvtss2si 1421 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] 1422 ret i32 %res 1423 } 1424 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 1425 1426 1427 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { 1428 ; CHECK: vcvttss2si 1429 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] 1430 ret i32 %res 1431 } 1432 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 1433 1434 1435 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { 1436 ; CHECK: vdivss 1437 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1438 ret <4 x float> %res 1439 } 1440 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 1441 1442 1443 define void @test_x86_sse_ldmxcsr(i8* %a0) { 1444 ; CHECK: movl 1445 ; CHECK: vldmxcsr 1446 call void @llvm.x86.sse.ldmxcsr(i8* %a0) 1447 ret void 1448 } 1449 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind 1450 1451 1452 1453 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { 1454 ; CHECK: vmaxps 1455 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1456 ret <4 x float> %res 1457 } 1458 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1459 1460 1461 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { 1462 ; CHECK: vmaxss 1463 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1464 ret <4 x float> %res 1465 } 1466 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1467 1468 1469 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { 1470 ; CHECK: vminps 1471 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1472 ret <4 x float> %res 1473 } 1474 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1475 1476 1477 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { 1478 ; CHECK: vminss 1479 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1480 ret <4 x float> %res 1481 } 1482 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1483 1484 1485 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { 1486 ; CHECK: vmovmskps 1487 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] 1488 ret i32 %res 1489 } 1490 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1491 1492 1493 1494 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { 1495 ; CHECK: vmulss 1496 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1497 ret <4 x float> %res 1498 } 1499 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1500 1501 1502 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { 1503 ; CHECK: vrcpps 1504 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1505 ret <4 x float> %res 1506 } 1507 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1508 1509 1510 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { 1511 ; CHECK: vrcpss 1512 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1513 ret <4 x float> %res 1514 } 1515 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1516 1517 1518 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { 1519 ; CHECK: vrsqrtps 1520 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1521 ret <4 x float> %res 1522 } 1523 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1524 1525 1526 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { 1527 ; CHECK: vrsqrtss 1528 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1529 ret <4 x float> %res 1530 } 1531 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1532 1533 1534 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { 1535 ; CHECK: vsqrtps 1536 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1537 ret <4 x float> %res 1538 } 1539 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1540 1541 1542 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { 1543 ; CHECK: vsqrtss 1544 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1545 ret <4 x float> %res 1546 } 1547 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1548 1549 1550 define void @test_x86_sse_stmxcsr(i8* %a0) { 1551 ; CHECK: movl 1552 ; CHECK: vstmxcsr 1553 call void @llvm.x86.sse.stmxcsr(i8* %a0) 1554 ret void 1555 } 1556 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind 1557 1558 1559 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 1560 ; CHECK: movl 1561 ; CHECK: vmovups 1562 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 1563 ret void 1564 } 1565 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 1566 1567 1568 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { 1569 ; CHECK: vsubss 1570 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1571 ret <4 x float> %res 1572 } 1573 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1574 1575 1576 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { 1577 ; CHECK: vucomiss 1578 ; CHECK: sete 1579 ; CHECK: movzbl 1580 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1581 ret i32 %res 1582 } 1583 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1584 1585 1586 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { 1587 ; CHECK: vucomiss 1588 ; CHECK: setae 1589 ; CHECK: movzbl 1590 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1591 ret i32 %res 1592 } 1593 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 1594 1595 1596 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { 1597 ; CHECK: vucomiss 1598 ; CHECK: seta 1599 ; CHECK: movzbl 1600 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1601 ret i32 %res 1602 } 1603 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 1604 1605 1606 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { 1607 ; CHECK: vucomiss 1608 ; CHECK: setbe 1609 ; CHECK: movzbl 1610 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1611 ret i32 %res 1612 } 1613 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 1614 1615 1616 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { 1617 ; CHECK: vucomiss 1618 ; CHECK: sbbl 1619 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1620 ret i32 %res 1621 } 1622 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 1623 1624 1625 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { 1626 ; CHECK: vucomiss 1627 ; CHECK: setne 1628 ; CHECK: movzbl 1629 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1630 ret i32 %res 1631 } 1632 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 1633 1634 1635 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { 1636 ; CHECK: vpabsb 1637 %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 1638 ret <16 x i8> %res 1639 } 1640 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 1641 1642 1643 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { 1644 ; CHECK: vpabsd 1645 %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 1646 ret <4 x i32> %res 1647 } 1648 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 1649 1650 1651 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { 1652 ; CHECK: vpabsw 1653 %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 1654 ret <8 x i16> %res 1655 } 1656 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 1657 1658 1659 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1660 ; CHECK: vphaddd 1661 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1662 ret <4 x i32> %res 1663 } 1664 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1665 1666 1667 define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1668 ; CHECK: vphaddsw 1669 %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1670 ret <8 x i16> %res 1671 } 1672 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1673 1674 1675 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1676 ; CHECK: vphaddw 1677 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1678 ret <8 x i16> %res 1679 } 1680 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1681 1682 1683 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1684 ; CHECK: vphsubd 1685 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1686 ret <4 x i32> %res 1687 } 1688 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1689 1690 1691 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1692 ; CHECK: vphsubsw 1693 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1694 ret <8 x i16> %res 1695 } 1696 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1697 1698 1699 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1700 ; CHECK: vphsubw 1701 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1702 ret <8 x i16> %res 1703 } 1704 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1705 1706 1707 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) { 1708 ; CHECK: vpmaddubsw 1709 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] 1710 ret <8 x i16> %res 1711 } 1712 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 1713 1714 1715 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1716 ; CHECK: vpmulhrsw 1717 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1718 ret <8 x i16> %res 1719 } 1720 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1721 1722 1723 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { 1724 ; CHECK: vpshufb 1725 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1726 ret <16 x i8> %res 1727 } 1728 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 1729 1730 1731 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { 1732 ; CHECK: vpsignb 1733 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1734 ret <16 x i8> %res 1735 } 1736 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 1737 1738 1739 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1740 ; CHECK: vpsignd 1741 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1742 ret <4 x i32> %res 1743 } 1744 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1745 1746 1747 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1748 ; CHECK: vpsignw 1749 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1750 ret <8 x i16> %res 1751 } 1752 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1753 1754 1755 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 1756 ; CHECK: vaddsubpd 1757 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1758 ret <4 x double> %res 1759 } 1760 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 1761 1762 1763 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 1764 ; CHECK: vaddsubps 1765 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1766 ret <8 x float> %res 1767 } 1768 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 1769 1770 1771 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { 1772 ; CHECK: vblendpd 1773 %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] 1774 ret <4 x double> %res 1775 } 1776 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone 1777 1778 1779 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { 1780 ; CHECK: vblendps 1781 %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 1782 ret <8 x float> %res 1783 } 1784 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 1785 1786 1787 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 1788 ; CHECK: vblendvpd 1789 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] 1790 ret <4 x double> %res 1791 } 1792 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 1793 1794 1795 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 1796 ; CHECK: vblendvps 1797 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] 1798 ret <8 x float> %res 1799 } 1800 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 1801 1802 1803 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { 1804 ; CHECK: vcmpordpd 1805 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 1806 ret <4 x double> %res 1807 } 1808 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 1809 1810 1811 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { 1812 ; CHECK: vcmpordps 1813 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 1814 ret <8 x float> %res 1815 } 1816 1817 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) { 1818 ; CHECK: vcmpeqps 1819 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1] 1820 ; CHECK: vcmpltps 1821 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1] 1822 ; CHECK: vcmpleps 1823 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1] 1824 ; CHECK: vcmpunordps 1825 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1] 1826 ; CHECK: vcmpneqps 1827 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1] 1828 ; CHECK: vcmpnltps 1829 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1] 1830 ; CHECK: vcmpnleps 1831 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1] 1832 ; CHECK: vcmpordps 1833 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1] 1834 ; CHECK: vcmpeq_uqps 1835 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1] 1836 ; CHECK: vcmpngeps 1837 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1] 1838 ; CHECK: vcmpngtps 1839 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1] 1840 ; CHECK: vcmpfalseps 1841 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1] 1842 ; CHECK: vcmpneq_oqps 1843 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1] 1844 ; CHECK: vcmpgeps 1845 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1] 1846 ; CHECK: vcmpgtps 1847 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1] 1848 ; CHECK: vcmptrueps 1849 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1] 1850 ; CHECK: vcmpeq_osps 1851 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1] 1852 ; CHECK: vcmplt_oqps 1853 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1] 1854 ; CHECK: vcmple_oqps 1855 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1] 1856 ; CHECK: vcmpunord_sps 1857 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1] 1858 ; CHECK: vcmpneq_usps 1859 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1] 1860 ; CHECK: vcmpnlt_uqps 1861 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1] 1862 ; CHECK: vcmpnle_uqps 1863 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1] 1864 ; CHECK: vcmpord_sps 1865 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1] 1866 ; CHECK: vcmpeq_usps 1867 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1] 1868 ; CHECK: vcmpnge_uqps 1869 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1] 1870 ; CHECK: vcmpngt_uqps 1871 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1] 1872 ; CHECK: vcmpfalse_osps 1873 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1] 1874 ; CHECK: vcmpneq_osps 1875 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1] 1876 ; CHECK: vcmpge_oqps 1877 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1] 1878 ; CHECK: vcmpgt_oqps 1879 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1] 1880 ; CHECK: vcmptrue_usps 1881 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1] 1882 ret <8 x float> %res 1883 } 1884 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 1885 1886 1887 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { 1888 ; CHECK: vcvtpd2psy 1889 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] 1890 ret <4 x float> %res 1891 } 1892 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone 1893 1894 1895 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { 1896 ; CHECK: vcvtpd2dqy 1897 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 1898 ret <4 x i32> %res 1899 } 1900 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 1901 1902 1903 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 1904 ; CHECK: vcvtps2pd 1905 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 1906 ret <4 x double> %res 1907 } 1908 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 1909 1910 1911 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { 1912 ; CHECK: vcvtps2dq 1913 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 1914 ret <8 x i32> %res 1915 } 1916 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 1917 1918 1919 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 1920 ; CHECK: vcvtdq2pd 1921 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 1922 ret <4 x double> %res 1923 } 1924 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 1925 1926 1927 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 1928 ; CHECK: vcvtdq2ps 1929 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 1930 ret <8 x float> %res 1931 } 1932 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 1933 1934 1935 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { 1936 ; CHECK: vcvttpd2dqy 1937 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 1938 ret <4 x i32> %res 1939 } 1940 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone 1941 1942 1943 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { 1944 ; CHECK: vcvttps2dq 1945 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 1946 ret <8 x i32> %res 1947 } 1948 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone 1949 1950 1951 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 1952 ; CHECK: vdpps 1953 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 1954 ret <8 x float> %res 1955 } 1956 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 1957 1958 1959 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { 1960 ; CHECK: vhaddpd 1961 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1962 ret <4 x double> %res 1963 } 1964 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 1965 1966 1967 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { 1968 ; CHECK: vhaddps 1969 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1970 ret <8 x float> %res 1971 } 1972 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 1973 1974 1975 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 1976 ; CHECK: vhsubpd 1977 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1978 ret <4 x double> %res 1979 } 1980 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 1981 1982 1983 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 1984 ; CHECK: vhsubps 1985 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1986 ret <8 x float> %res 1987 } 1988 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 1989 1990 1991 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { 1992 ; CHECK: vlddqu 1993 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 1994 ret <32 x i8> %res 1995 } 1996 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly 1997 1998 1999 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) { 2000 ; CHECK: vmaskmovpd 2001 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 2002 ret <2 x double> %res 2003 } 2004 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly 2005 2006 2007 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) { 2008 ; CHECK: vmaskmovpd 2009 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2010 ret <4 x double> %res 2011 } 2012 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly 2013 2014 2015 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) { 2016 ; CHECK: vmaskmovps 2017 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 2018 ret <4 x float> %res 2019 } 2020 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly 2021 2022 2023 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) { 2024 ; CHECK: vmaskmovps 2025 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2026 ret <8 x float> %res 2027 } 2028 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly 2029 2030 2031 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) { 2032 ; CHECK: vmaskmovpd 2033 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) 2034 ret void 2035 } 2036 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind 2037 2038 2039 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) { 2040 ; CHECK: vmaskmovpd 2041 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2) 2042 ret void 2043 } 2044 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind 2045 2046 2047 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) { 2048 ; CHECK: vmaskmovps 2049 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) 2050 ret void 2051 } 2052 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind 2053 2054 2055 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) { 2056 ; CHECK: vmaskmovps 2057 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2) 2058 ret void 2059 } 2060 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind 2061 2062 2063 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { 2064 ; CHECK: vmaxpd 2065 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2066 ret <4 x double> %res 2067 } 2068 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 2069 2070 2071 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { 2072 ; CHECK: vmaxps 2073 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2074 ret <8 x float> %res 2075 } 2076 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 2077 2078 2079 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { 2080 ; CHECK: vminpd 2081 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2082 ret <4 x double> %res 2083 } 2084 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 2085 2086 2087 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { 2088 ; CHECK: vminps 2089 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2090 ret <8 x float> %res 2091 } 2092 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 2093 2094 2095 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { 2096 ; CHECK: vmovmskpd 2097 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] 2098 ret i32 %res 2099 } 2100 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 2101 2102 2103 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { 2104 ; CHECK: vmovmskps 2105 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] 2106 ret i32 %res 2107 } 2108 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 2109 2110 2111 2112 2113 2114 2115 2116 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { 2117 ; CHECK: vptest 2118 ; CHECK: sbbl 2119 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2120 ret i32 %res 2121 } 2122 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone 2123 2124 2125 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { 2126 ; CHECK: vptest 2127 ; CHECK: seta 2128 ; CHECK: movzbl 2129 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2130 ret i32 %res 2131 } 2132 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone 2133 2134 2135 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { 2136 ; CHECK: vptest 2137 ; CHECK: sete 2138 ; CHECK: movzbl 2139 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2140 ret i32 %res 2141 } 2142 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone 2143 2144 2145 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { 2146 ; CHECK: vrcpps 2147 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2148 ret <8 x float> %res 2149 } 2150 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 2151 2152 2153 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { 2154 ; CHECK: vroundpd 2155 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] 2156 ret <4 x double> %res 2157 } 2158 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 2159 2160 2161 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { 2162 ; CHECK: vroundps 2163 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] 2164 ret <8 x float> %res 2165 } 2166 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 2167 2168 2169 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { 2170 ; CHECK: vrsqrtps 2171 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2172 ret <8 x float> %res 2173 } 2174 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 2175 2176 2177 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 2178 ; CHECK: vsqrtpd 2179 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 2180 ret <4 x double> %res 2181 } 2182 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 2183 2184 2185 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 2186 ; CHECK: vsqrtps 2187 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2188 ret <8 x float> %res 2189 } 2190 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 2191 2192 2193 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 2194 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions 2195 ; CHECK: vmovups 2196 ; add operation forces the execution domain. 2197 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2198 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 2199 ret void 2200 } 2201 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 2202 2203 2204 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 2205 ; CHECK: vmovupd 2206 ; add operation forces the execution domain. 2207 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 2208 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) 2209 ret void 2210 } 2211 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 2212 2213 2214 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 2215 ; CHECK: vmovups 2216 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 2217 ret void 2218 } 2219 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 2220 2221 2222 define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) { 2223 ; CHECK: vbroadcastsd 2224 %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2225 ret <4 x double> %res 2226 } 2227 declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly 2228 2229 2230 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 2231 ; CHECK: vbroadcastf128 2232 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2233 ret <4 x double> %res 2234 } 2235 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 2236 2237 2238 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 2239 ; CHECK: vbroadcastf128 2240 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 2241 ret <8 x float> %res 2242 } 2243 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 2244 2245 2246 define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) { 2247 ; CHECK: vbroadcastss 2248 %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1] 2249 ret <4 x float> %res 2250 } 2251 declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly 2252 2253 2254 define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) { 2255 ; CHECK: vbroadcastss 2256 %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1] 2257 ret <8 x float> %res 2258 } 2259 declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly 2260 2261 2262 define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) { 2263 ; CHECK: vextractf128 2264 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] 2265 ret <2 x double> %res 2266 } 2267 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone 2268 2269 2270 define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) { 2271 ; CHECK: vextractf128 2272 %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 2273 ret <4 x float> %res 2274 } 2275 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone 2276 2277 2278 define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) { 2279 ; CHECK: vextractf128 2280 %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1] 2281 ret <4 x i32> %res 2282 } 2283 declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone 2284 2285 2286 define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) { 2287 ; CHECK: vinsertf128 2288 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2289 ret <4 x double> %res 2290 } 2291 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone 2292 2293 2294 define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) { 2295 ; CHECK: vinsertf128 2296 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2297 ret <8 x float> %res 2298 } 2299 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone 2300 2301 2302 define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) { 2303 ; CHECK: vinsertf128 2304 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2305 ret <8 x i32> %res 2306 } 2307 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone 2308 2309 2310 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 2311 ; CHECK: vperm2f128 2312 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2313 ret <4 x double> %res 2314 } 2315 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2316 2317 2318 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 2319 ; CHECK: vperm2f128 2320 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2321 ret <8 x float> %res 2322 } 2323 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2324 2325 2326 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 2327 ; CHECK: vperm2f128 2328 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2329 ret <8 x i32> %res 2330 } 2331 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 2332 2333 2334 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 2335 ; CHECK: vpermilpd 2336 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] 2337 ret <2 x double> %res 2338 } 2339 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 2340 2341 2342 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 2343 ; CHECK: vpermilpd 2344 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 2345 ret <4 x double> %res 2346 } 2347 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 2348 2349 2350 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 2351 ; CHECK: vpshufd 2352 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 2353 ret <4 x float> %res 2354 } 2355 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 2356 2357 2358 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 2359 ; CHECK: vpermilps 2360 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 2361 ret <8 x float> %res 2362 } 2363 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 2364 2365 2366 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { 2367 ; CHECK: vpermilpd 2368 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] 2369 ret <2 x double> %res 2370 } 2371 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 2372 2373 2374 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { 2375 ; CHECK: vpermilpd 2376 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] 2377 ret <4 x double> %res 2378 } 2379 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 2380 2381 2382 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { 2383 ; CHECK: vpermilps 2384 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] 2385 ret <4 x float> %res 2386 } 2387 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) { 2388 ; CHECK: vpermilps 2389 %a2 = load <4 x i32>* %a1 2390 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] 2391 ret <4 x float> %res 2392 } 2393 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 2394 2395 2396 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { 2397 ; CHECK: vpermilps 2398 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 2399 ret <8 x float> %res 2400 } 2401 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 2402 2403 2404 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { 2405 ; CHECK: vtestpd 2406 ; CHECK: sbbl 2407 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2408 ret i32 %res 2409 } 2410 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 2411 2412 2413 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { 2414 ; CHECK: vtestpd 2415 ; CHECK: sbbl 2416 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2417 ret i32 %res 2418 } 2419 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 2420 2421 2422 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { 2423 ; CHECK: vtestps 2424 ; CHECK: sbbl 2425 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2426 ret i32 %res 2427 } 2428 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 2429 2430 2431 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { 2432 ; CHECK: vtestps 2433 ; CHECK: sbbl 2434 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2435 ret i32 %res 2436 } 2437 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 2438 2439 2440 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { 2441 ; CHECK: vtestpd 2442 ; CHECK: seta 2443 ; CHECK: movzbl 2444 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2445 ret i32 %res 2446 } 2447 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 2448 2449 2450 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { 2451 ; CHECK: vtestpd 2452 ; CHECK: seta 2453 ; CHECK: movzbl 2454 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2455 ret i32 %res 2456 } 2457 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 2458 2459 2460 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { 2461 ; CHECK: vtestps 2462 ; CHECK: seta 2463 ; CHECK: movzbl 2464 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2465 ret i32 %res 2466 } 2467 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 2468 2469 2470 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { 2471 ; CHECK: vtestps 2472 ; CHECK: seta 2473 ; CHECK: movzbl 2474 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2475 ret i32 %res 2476 } 2477 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 2478 2479 2480 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { 2481 ; CHECK: vtestpd 2482 ; CHECK: sete 2483 ; CHECK: movzbl 2484 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2485 ret i32 %res 2486 } 2487 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 2488 2489 2490 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { 2491 ; CHECK: vtestpd 2492 ; CHECK: sete 2493 ; CHECK: movzbl 2494 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2495 ret i32 %res 2496 } 2497 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 2498 2499 2500 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { 2501 ; CHECK: vtestps 2502 ; CHECK: sete 2503 ; CHECK: movzbl 2504 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2505 ret i32 %res 2506 } 2507 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 2508 2509 2510 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { 2511 ; CHECK: vtestps 2512 ; CHECK: sete 2513 ; CHECK: movzbl 2514 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2515 ret i32 %res 2516 } 2517 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 2518 2519 2520 define void @test_x86_avx_vzeroall() { 2521 ; CHECK: vzeroall 2522 call void @llvm.x86.avx.vzeroall() 2523 ret void 2524 } 2525 declare void @llvm.x86.avx.vzeroall() nounwind 2526 2527 2528 define void @test_x86_avx_vzeroupper() { 2529 ; CHECK: vzeroupper 2530 call void @llvm.x86.avx.vzeroupper() 2531 ret void 2532 } 2533 declare void @llvm.x86.avx.vzeroupper() nounwind 2534 2535 ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work 2536 2537 ; CHECK: monitor 2538 define void @monitor(i8* %P, i32 %E, i32 %H) nounwind { 2539 entry: 2540 tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) 2541 ret void 2542 } 2543 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind 2544 2545 ; CHECK: mwait 2546 define void @mwait(i32 %E, i32 %H) nounwind { 2547 entry: 2548 tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) 2549 ret void 2550 } 2551 declare void @llvm.x86.sse3.mwait(i32, i32) nounwind 2552 2553 ; CHECK: sfence 2554 define void @sfence() nounwind { 2555 entry: 2556 tail call void @llvm.x86.sse.sfence() 2557 ret void 2558 } 2559 declare void @llvm.x86.sse.sfence() nounwind 2560 2561 ; CHECK: lfence 2562 define void @lfence() nounwind { 2563 entry: 2564 tail call void @llvm.x86.sse2.lfence() 2565 ret void 2566 } 2567 declare void @llvm.x86.sse2.lfence() nounwind 2568 2569 ; CHECK: mfence 2570 define void @mfence() nounwind { 2571 entry: 2572 tail call void @llvm.x86.sse2.mfence() 2573 ret void 2574 } 2575 declare void @llvm.x86.sse2.mfence() nounwind 2576 2577 ; CHECK: clflush 2578 define void @clflush(i8* %p) nounwind { 2579 entry: 2580 tail call void @llvm.x86.sse2.clflush(i8* %p) 2581 ret void 2582 } 2583 declare void @llvm.x86.sse2.clflush(i8*) nounwind 2584 2585 ; CHECK: crc32b 2586 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { 2587 %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) 2588 ret i32 %tmp 2589 } 2590 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind 2591 2592 ; CHECK: crc32w 2593 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { 2594 %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) 2595 ret i32 %tmp 2596 } 2597 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind 2598 2599 ; CHECK: crc32l 2600 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { 2601 %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) 2602 ret i32 %tmp 2603 } 2604 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind 2605 2606 ; CHECK: movntdq 2607 define void @movnt_dq(i8* %p, <4 x i64> %a1) nounwind { 2608 %a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1> 2609 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a2) nounwind 2610 ret void 2611 } 2612 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind 2613 2614 ; CHECK: movntps 2615 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind { 2616 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind 2617 ret void 2618 } 2619 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind 2620 2621 ; CHECK: movntpd 2622 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind { 2623 ; add operation forces the execution domain. 2624 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 2625 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind 2626 ret void 2627 } 2628 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind 2629 2630 2631 ; Check for pclmulqdq 2632 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) { 2633 ; CHECK: vpclmulqdq 2634 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1] 2635 ret <2 x i64> %res 2636 } 2637 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone 2638