1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s 3 4 define <64 x i8> @vpaddb512_test(<64 x i8> %i, <64 x i8> %j) nounwind readnone { 5 ; CHECK-LABEL: vpaddb512_test: 6 ; CHECK: # %bb.0: 7 ; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0 8 ; CHECK-NEXT: retq 9 %x = add <64 x i8> %i, %j 10 ret <64 x i8> %x 11 } 12 13 define <64 x i8> @vpaddb512_fold_test(<64 x i8> %i, <64 x i8>* %j) nounwind { 14 ; CHECK-LABEL: vpaddb512_fold_test: 15 ; CHECK: # %bb.0: 16 ; CHECK-NEXT: vpaddb (%rdi), %zmm0, %zmm0 17 ; CHECK-NEXT: retq 18 %tmp = load <64 x i8>, <64 x i8>* %j, align 4 19 %x = add <64 x i8> %i, %tmp 20 ret <64 x i8> %x 21 } 22 23 define <32 x i16> @vpaddw512_test(<32 x i16> %i, <32 x i16> %j) nounwind readnone { 24 ; CHECK-LABEL: vpaddw512_test: 25 ; CHECK: # %bb.0: 26 ; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 27 ; CHECK-NEXT: retq 28 %x = add <32 x i16> %i, %j 29 ret <32 x i16> %x 30 } 31 32 define <32 x i16> @vpaddw512_fold_test(<32 x i16> %i, <32 x i16>* %j) nounwind { 33 ; CHECK-LABEL: vpaddw512_fold_test: 34 ; CHECK: # %bb.0: 35 ; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm0 36 ; CHECK-NEXT: retq 37 %tmp = load <32 x i16>, <32 x i16>* %j, align 4 38 %x = add <32 x i16> %i, %tmp 39 ret <32 x i16> %x 40 } 41 42 define <32 x i16> @vpaddw512_mask_test(<32 x i16> %i, <32 x i16> %j, <32 x i16> %mask1) nounwind readnone { 43 ; CHECK-LABEL: vpaddw512_mask_test: 44 ; CHECK: # %bb.0: 45 ; CHECK-NEXT: vptestmw %zmm2, %zmm2, %k1 46 ; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} 47 ; CHECK-NEXT: retq 48 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 49 %x = add <32 x i16> %i, %j 50 %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %i 51 ret <32 x i16> %r 52 } 53 54 define <32 x i16> @vpaddw512_maskz_test(<32 x i16> %i, <32 x i16> %j, <32 x i16> %mask1) nounwind readnone { 55 ; CHECK-LABEL: vpaddw512_maskz_test: 56 ; CHECK: # %bb.0: 57 ; CHECK-NEXT: vptestmw %zmm2, %zmm2, %k1 58 ; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} 59 ; CHECK-NEXT: retq 60 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 61 %x = add <32 x i16> %i, %j 62 %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 63 ret <32 x i16> %r 64 } 65 66 define <32 x i16> @vpaddw512_mask_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <32 x i16> %mask1) nounwind readnone { 67 ; CHECK-LABEL: vpaddw512_mask_fold_test: 68 ; CHECK: # %bb.0: 69 ; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 70 ; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm0 {%k1} 71 ; CHECK-NEXT: retq 72 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 73 %j = load <32 x i16>, <32 x i16>* %j.ptr 74 %x = add <32 x i16> %i, %j 75 %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %i 76 ret <32 x i16> %r 77 } 78 79 define <32 x i16> @vpaddw512_maskz_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <32 x i16> %mask1) nounwind readnone { 80 ; CHECK-LABEL: vpaddw512_maskz_fold_test: 81 ; CHECK: # %bb.0: 82 ; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 83 ; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} 84 ; CHECK-NEXT: retq 85 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 86 %j = load <32 x i16>, <32 x i16>* %j.ptr 87 %x = add <32 x i16> %i, %j 88 %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 89 ret <32 x i16> %r 90 } 91 92 define <64 x i8> @vpsubb512_test(<64 x i8> %i, <64 x i8> %j) nounwind readnone { 93 ; CHECK-LABEL: vpsubb512_test: 94 ; CHECK: # %bb.0: 95 ; CHECK-NEXT: vpsubb %zmm1, %zmm0, %zmm0 96 ; CHECK-NEXT: retq 97 %x = sub <64 x i8> %i, %j 98 ret <64 x i8> %x 99 } 100 101 define <32 x i16> @vpsubw512_test(<32 x i16> %i, <32 x i16> %j) nounwind readnone { 102 ; CHECK-LABEL: vpsubw512_test: 103 ; CHECK: # %bb.0: 104 ; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 105 ; CHECK-NEXT: retq 106 %x = sub <32 x i16> %i, %j 107 ret <32 x i16> %x 108 } 109 110 define <32 x i16> @vpmullw512_test(<32 x i16> %i, <32 x i16> %j) { 111 ; CHECK-LABEL: vpmullw512_test: 112 ; CHECK: # %bb.0: 113 ; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 114 ; CHECK-NEXT: retq 115 %x = mul <32 x i16> %i, %j 116 ret <32 x i16> %x 117 } 118 119