1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 6 declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 7 8 define <16 x i32>@test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 9 ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: 10 ; X86: # %bb.0: 11 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 12 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 13 ; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 14 ; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x18] 15 ; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 16 ; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xe2] 17 ; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] 18 ; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0] 19 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 20 ; X86-NEXT: retl # encoding: [0xc3] 21 ; 22 ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: 23 ; X64: # %bb.0: 24 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 25 ; X64-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 26 ; X64-NEXT: vpdpbusd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x1f] 27 ; X64-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 28 ; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xe2] 29 ; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] 30 ; X64-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0] 31 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 32 ; X64-NEXT: retq # encoding: [0xc3] 33 %x2 = load <16 x i32>, <16 x i32>* %x2p 34 %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 35 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) 36 %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) 37 %res3 = add <16 x i32> %res, %res1 38 %res4 = add <16 x i32> %res2, %res3 39 ret <16 x i32> %res4 40 } 41 42 declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 43 declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 44 45 define <16 x i32>@test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 46 ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: 47 ; X86: # %bb.0: 48 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 49 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 50 ; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 51 ; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x18] 52 ; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 53 ; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xe2] 54 ; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] 55 ; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0] 56 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 57 ; X86-NEXT: retl # encoding: [0xc3] 58 ; 59 ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: 60 ; X64: # %bb.0: 61 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 62 ; X64-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 63 ; X64-NEXT: vpdpbusds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x1f] 64 ; X64-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 65 ; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xe2] 66 ; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] 67 ; X64-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0] 68 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 69 ; X64-NEXT: retq # encoding: [0xc3] 70 %x2 = load <16 x i32>, <16 x i32>* %x2p 71 %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 72 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) 73 %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) 74 %res3 = add <16 x i32> %res, %res1 75 %res4 = add <16 x i32> %res2, %res3 76 ret <16 x i32> %res4 77 } 78 79 declare <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 80 declare <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 81 82 define <16 x i32>@test_int_x86_avx512_mask_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 83 ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: 84 ; X86: # %bb.0: 85 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 86 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 87 ; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 88 ; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x18] 89 ; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 90 ; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xe2] 91 ; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] 92 ; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0] 93 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 94 ; X86-NEXT: retl # encoding: [0xc3] 95 ; 96 ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: 97 ; X64: # %bb.0: 98 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 99 ; X64-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 100 ; X64-NEXT: vpdpwssd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x1f] 101 ; X64-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 102 ; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xe2] 103 ; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] 104 ; X64-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0] 105 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 106 ; X64-NEXT: retq # encoding: [0xc3] 107 %x2 = load <16 x i32>, <16 x i32>* %x2p 108 %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 109 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) 110 %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) 111 %res3 = add <16 x i32> %res, %res1 112 %res4 = add <16 x i32> %res2, %res3 113 ret <16 x i32> %res4 114 } 115 116 declare <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 117 declare <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 118 119 define <16 x i32>@test_int_x86_avx512_mask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 120 ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: 121 ; X86: # %bb.0: 122 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 123 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 124 ; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 125 ; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x18] 126 ; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 127 ; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xe2] 128 ; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] 129 ; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0] 130 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 131 ; X86-NEXT: retl # encoding: [0xc3] 132 ; 133 ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: 134 ; X64: # %bb.0: 135 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 136 ; X64-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 137 ; X64-NEXT: vpdpwssds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x1f] 138 ; X64-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 139 ; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xe2] 140 ; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] 141 ; X64-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0] 142 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 143 ; X64-NEXT: retq # encoding: [0xc3] 144 %x2 = load <16 x i32>, <16 x i32>* %x2p 145 %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 146 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) 147 %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) 148 %res3 = add <16 x i32> %res, %res1 149 %res4 = add <16 x i32> %res2, %res3 150 ret <16 x i32> %res4 151 } 152 153