1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s 3 4 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 5 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 6 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 7 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 8 9 define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load0(<4 x float>* %x0ptr, <4 x float> %x1, <4 x float> %x2) { 10 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_load0: 11 ; CHECK: ## %bb.0: 12 ; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 13 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 14 ; CHECK-NEXT: retq 15 %x0 = load <4 x float>, <4 x float>* %x0ptr 16 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 17 ret <4 x float> %res 18 } 19 20 define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load1(<4 x float> %x0, <4 x float>* %x1ptr, <4 x float> %x2){ 21 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_load1: 22 ; CHECK: ## %bb.0: 23 ; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 24 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 25 ; CHECK-NEXT: retq 26 %x1 = load <4 x float>, <4 x float>* %x1ptr 27 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 28 ret <4 x float> %res 29 } 30 31 define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load0(<2 x double>* %x0ptr, <2 x double> %x1, <2 x double> %x2) { 32 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd_load0: 33 ; CHECK: ## %bb.0: 34 ; CHECK-NEXT: vfmadd231sd (%rdi), %xmm0, %xmm1 35 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 36 ; CHECK-NEXT: retq 37 %x0 = load <2 x double>, <2 x double>* %x0ptr 38 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 39 ret <2 x double> %res 40 } 41 42 define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load1(<2 x double> %x0, <2 x double>* %x1ptr, <2 x double> %x2){ 43 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd_load1: 44 ; CHECK: ## %bb.0: 45 ; CHECK-NEXT: vfmadd231sd (%rdi), %xmm0, %xmm1 46 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 47 ; CHECK-NEXT: retq 48 %x1 = load <2 x double>, <2 x double>* %x1ptr 49 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 50 ret <2 x double> %res 51 } 52 53 define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load0(<4 x float>* %x0ptr, <4 x float> %x1, <4 x float> %x2) { 54 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss_load0: 55 ; CHECK: ## %bb.0: 56 ; CHECK-NEXT: vfmsub231ss (%rdi), %xmm0, %xmm1 57 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 58 ; CHECK-NEXT: retq 59 %x0 = load <4 x float>, <4 x float>* %x0ptr 60 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 61 ret <4 x float> %res 62 } 63 64 define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load1(<4 x float> %x0, <4 x float>* %x1ptr, <4 x float> %x2){ 65 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss_load1: 66 ; CHECK: ## %bb.0: 67 ; CHECK-NEXT: vfmsub231ss (%rdi), %xmm0, %xmm1 68 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 69 ; CHECK-NEXT: retq 70 %x1 = load <4 x float>, <4 x float>* %x1ptr 71 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 72 ret <4 x float> %res 73 } 74 75 define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load0(<2 x double>* %x0ptr, <2 x double> %x1, <2 x double> %x2) { 76 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd_load0: 77 ; CHECK: ## %bb.0: 78 ; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 79 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 80 ; CHECK-NEXT: retq 81 %x0 = load <2 x double>, <2 x double>* %x0ptr 82 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 83 ret <2 x double> %res 84 } 85 86 define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load1(<2 x double> %x0, <2 x double>* %x1ptr, <2 x double> %x2){ 87 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd_load1: 88 ; CHECK: ## %bb.0: 89 ; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 90 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 91 ; CHECK-NEXT: retq 92 %x1 = load <2 x double>, <2 x double>* %x1ptr 93 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 94 ret <2 x double> %res 95 } 96