1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s 3 4 ; 256-bit 5 6 define <32 x i8> @vpaddb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 7 ; CHECK-LABEL: vpaddb256_test: 8 ; CHECK: # %bb.0: 9 ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 10 ; CHECK-NEXT: retq 11 %x = add <32 x i8> %i, %j 12 ret <32 x i8> %x 13 } 14 15 define <32 x i8> @vpaddb256_fold_test(<32 x i8> %i, <32 x i8>* %j) nounwind { 16 ; CHECK-LABEL: vpaddb256_fold_test: 17 ; CHECK: # %bb.0: 18 ; CHECK-NEXT: vpaddb (%rdi), %ymm0, %ymm0 19 ; CHECK-NEXT: retq 20 %tmp = load <32 x i8>, <32 x i8>* %j, align 4 21 %x = add <32 x i8> %i, %tmp 22 ret <32 x i8> %x 23 } 24 25 define <16 x i16> @vpaddw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 26 ; CHECK-LABEL: vpaddw256_test: 27 ; CHECK: # %bb.0: 28 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 29 ; CHECK-NEXT: retq 30 %x = add <16 x i16> %i, %j 31 ret <16 x i16> %x 32 } 33 34 define <16 x i16> @vpaddw256_fold_test(<16 x i16> %i, <16 x i16>* %j) nounwind { 35 ; CHECK-LABEL: vpaddw256_fold_test: 36 ; CHECK: # %bb.0: 37 ; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 38 ; CHECK-NEXT: retq 39 %tmp = load <16 x i16>, <16 x i16>* %j, align 4 40 %x = add <16 x i16> %i, %tmp 41 ret <16 x i16> %x 42 } 43 44 define <16 x i16> @vpaddw256_mask_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone { 45 ; CHECK-LABEL: vpaddw256_mask_test: 46 ; CHECK: # %bb.0: 47 ; CHECK-NEXT: vptestmw %ymm2, %ymm2, %k1 48 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} 49 ; CHECK-NEXT: retq 50 %mask = icmp ne <16 x i16> %mask1, zeroinitializer 51 %x = add <16 x i16> %i, %j 52 %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i 53 ret <16 x i16> %r 54 } 55 56 define <16 x i16> @vpaddw256_maskz_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone { 57 ; CHECK-LABEL: vpaddw256_maskz_test: 58 ; CHECK: # %bb.0: 59 ; CHECK-NEXT: vptestmw %ymm2, %ymm2, %k1 60 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} 61 ; CHECK-NEXT: retq 62 %mask = icmp ne <16 x i16> %mask1, zeroinitializer 63 %x = add <16 x i16> %i, %j 64 %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 65 ret <16 x i16> %r 66 } 67 68 define <16 x i16> @vpaddw256_mask_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone { 69 ; CHECK-LABEL: vpaddw256_mask_fold_test: 70 ; CHECK: # %bb.0: 71 ; CHECK-NEXT: vptestmw %ymm1, %ymm1, %k1 72 ; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} 73 ; CHECK-NEXT: retq 74 %mask = icmp ne <16 x i16> %mask1, zeroinitializer 75 %j = load <16 x i16>, <16 x i16>* %j.ptr 76 %x = add <16 x i16> %i, %j 77 %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i 78 ret <16 x i16> %r 79 } 80 81 define <16 x i16> @vpaddw256_maskz_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone { 82 ; CHECK-LABEL: vpaddw256_maskz_fold_test: 83 ; CHECK: # %bb.0: 84 ; CHECK-NEXT: vptestmw %ymm1, %ymm1, %k1 85 ; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} 86 ; CHECK-NEXT: retq 87 %mask = icmp ne <16 x i16> %mask1, zeroinitializer 88 %j = load <16 x i16>, <16 x i16>* %j.ptr 89 %x = add <16 x i16> %i, %j 90 %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 91 ret <16 x i16> %r 92 } 93 94 define <32 x i8> @vpsubb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 95 ; CHECK-LABEL: vpsubb256_test: 96 ; CHECK: # %bb.0: 97 ; CHECK-NEXT: vpsubb %ymm1, %ymm0, %ymm0 98 ; CHECK-NEXT: retq 99 %x = sub <32 x i8> %i, %j 100 ret <32 x i8> %x 101 } 102 103 define <16 x i16> @vpsubw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 104 ; CHECK-LABEL: vpsubw256_test: 105 ; CHECK: # %bb.0: 106 ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 107 ; CHECK-NEXT: retq 108 %x = sub <16 x i16> %i, %j 109 ret <16 x i16> %x 110 } 111 112 define <16 x i16> @vpmullw256_test(<16 x i16> %i, <16 x i16> %j) { 113 ; CHECK-LABEL: vpmullw256_test: 114 ; CHECK: # %bb.0: 115 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 116 ; CHECK-NEXT: retq 117 %x = mul <16 x i16> %i, %j 118 ret <16 x i16> %x 119 } 120 121 ; 128-bit 122 123 define <16 x i8> @vpaddb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 124 ; CHECK-LABEL: vpaddb128_test: 125 ; CHECK: # %bb.0: 126 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 127 ; CHECK-NEXT: retq 128 %x = add <16 x i8> %i, %j 129 ret <16 x i8> %x 130 } 131 132 define <16 x i8> @vpaddb128_fold_test(<16 x i8> %i, <16 x i8>* %j) nounwind { 133 ; CHECK-LABEL: vpaddb128_fold_test: 134 ; CHECK: # %bb.0: 135 ; CHECK-NEXT: vpaddb (%rdi), %xmm0, %xmm0 136 ; CHECK-NEXT: retq 137 %tmp = load <16 x i8>, <16 x i8>* %j, align 4 138 %x = add <16 x i8> %i, %tmp 139 ret <16 x i8> %x 140 } 141 142 define <8 x i16> @vpaddw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 143 ; CHECK-LABEL: vpaddw128_test: 144 ; CHECK: # %bb.0: 145 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 146 ; CHECK-NEXT: retq 147 %x = add <8 x i16> %i, %j 148 ret <8 x i16> %x 149 } 150 151 define <8 x i16> @vpaddw128_fold_test(<8 x i16> %i, <8 x i16>* %j) nounwind { 152 ; CHECK-LABEL: vpaddw128_fold_test: 153 ; CHECK: # %bb.0: 154 ; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 155 ; CHECK-NEXT: retq 156 %tmp = load <8 x i16>, <8 x i16>* %j, align 4 157 %x = add <8 x i16> %i, %tmp 158 ret <8 x i16> %x 159 } 160 161 define <8 x i16> @vpaddw128_mask_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone { 162 ; CHECK-LABEL: vpaddw128_mask_test: 163 ; CHECK: # %bb.0: 164 ; CHECK-NEXT: vptestmw %xmm2, %xmm2, %k1 165 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} 166 ; CHECK-NEXT: retq 167 %mask = icmp ne <8 x i16> %mask1, zeroinitializer 168 %x = add <8 x i16> %i, %j 169 %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i 170 ret <8 x i16> %r 171 } 172 173 define <8 x i16> @vpaddw128_maskz_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone { 174 ; CHECK-LABEL: vpaddw128_maskz_test: 175 ; CHECK: # %bb.0: 176 ; CHECK-NEXT: vptestmw %xmm2, %xmm2, %k1 177 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} 178 ; CHECK-NEXT: retq 179 %mask = icmp ne <8 x i16> %mask1, zeroinitializer 180 %x = add <8 x i16> %i, %j 181 %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 182 ret <8 x i16> %r 183 } 184 185 define <8 x i16> @vpaddw128_mask_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone { 186 ; CHECK-LABEL: vpaddw128_mask_fold_test: 187 ; CHECK: # %bb.0: 188 ; CHECK-NEXT: vptestmw %xmm1, %xmm1, %k1 189 ; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} 190 ; CHECK-NEXT: retq 191 %mask = icmp ne <8 x i16> %mask1, zeroinitializer 192 %j = load <8 x i16>, <8 x i16>* %j.ptr 193 %x = add <8 x i16> %i, %j 194 %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i 195 ret <8 x i16> %r 196 } 197 198 define <8 x i16> @vpaddw128_maskz_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone { 199 ; CHECK-LABEL: vpaddw128_maskz_fold_test: 200 ; CHECK: # %bb.0: 201 ; CHECK-NEXT: vptestmw %xmm1, %xmm1, %k1 202 ; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} 203 ; CHECK-NEXT: retq 204 %mask = icmp ne <8 x i16> %mask1, zeroinitializer 205 %j = load <8 x i16>, <8 x i16>* %j.ptr 206 %x = add <8 x i16> %i, %j 207 %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 208 ret <8 x i16> %r 209 } 210 211 define <16 x i8> @vpsubb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 212 ; CHECK-LABEL: vpsubb128_test: 213 ; CHECK: # %bb.0: 214 ; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0 215 ; CHECK-NEXT: retq 216 %x = sub <16 x i8> %i, %j 217 ret <16 x i8> %x 218 } 219 220 define <8 x i16> @vpsubw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 221 ; CHECK-LABEL: vpsubw128_test: 222 ; CHECK: # %bb.0: 223 ; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 224 ; CHECK-NEXT: retq 225 %x = sub <8 x i16> %i, %j 226 ret <8 x i16> %x 227 } 228 229 define <8 x i16> @vpmullw128_test(<8 x i16> %i, <8 x i16> %j) { 230 ; CHECK-LABEL: vpmullw128_test: 231 ; CHECK: # %bb.0: 232 ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 233 ; CHECK-NEXT: retq 234 %x = mul <8 x i16> %i, %j 235 ret <8 x i16> %x 236 } 237 238