1 ; RUN: llc < %s -march=x86-64 -mattr=+ssse3,-avx | FileCheck %s -check-prefix=SSSE3 2 ; RUN: llc < %s -march=x86-64 -mattr=-ssse3,+avx | FileCheck %s -check-prefix=AVX 3 4 ; SSSE3-LABEL: phaddw1: 5 ; SSSE3-NOT: vphaddw 6 ; SSSE3: phaddw 7 ; AVX-LABEL: phaddw1: 8 ; AVX: vphaddw 9 define <8 x i16> @phaddw1(<8 x i16> %x, <8 x i16> %y) { 10 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 11 %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 12 %r = add <8 x i16> %a, %b 13 ret <8 x i16> %r 14 } 15 16 ; SSSE3-LABEL: phaddw2: 17 ; SSSE3-NOT: vphaddw 18 ; SSSE3: phaddw 19 ; AVX-LABEL: phaddw2: 20 ; AVX: vphaddw 21 define <8 x i16> @phaddw2(<8 x i16> %x, <8 x i16> %y) { 22 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14> 23 %b = shufflevector <8 x i16> %y, <8 x i16> %x, <8 x i32> <i32 8, i32 11, i32 12, i32 15, i32 0, i32 3, i32 4, i32 7> 24 %r = add <8 x i16> %a, %b 25 ret <8 x i16> %r 26 } 27 28 ; SSSE3-LABEL: phaddd1: 29 ; SSSE3-NOT: vphaddd 30 ; SSSE3: phaddd 31 ; AVX-LABEL: phaddd1: 32 ; AVX: vphaddd 33 define <4 x i32> @phaddd1(<4 x i32> %x, <4 x i32> %y) { 34 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 35 %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 36 %r = add <4 x i32> %a, %b 37 ret <4 x i32> %r 38 } 39 40 ; SSSE3-LABEL: phaddd2: 41 ; SSSE3-NOT: vphaddd 42 ; SSSE3: phaddd 43 ; AVX-LABEL: phaddd2: 44 ; AVX: vphaddd 45 define <4 x i32> @phaddd2(<4 x i32> %x, <4 x i32> %y) { 46 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6> 47 %b = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3> 48 %r = add <4 x i32> %a, %b 49 ret <4 x i32> %r 50 } 51 52 ; SSSE3-LABEL: phaddd3: 53 ; SSSE3-NOT: vphaddd 54 ; SSSE3: phaddd 55 ; AVX-LABEL: phaddd3: 56 ; AVX: vphaddd 57 define <4 x i32> @phaddd3(<4 x i32> %x) { 58 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 59 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 60 %r = add <4 x i32> %a, %b 61 ret <4 x i32> %r 62 } 63 64 ; SSSE3-LABEL: phaddd4: 65 ; SSSE3-NOT: vphaddd 66 ; SSSE3: phaddd 67 ; AVX-LABEL: phaddd4: 68 ; AVX: vphaddd 69 define <4 x i32> @phaddd4(<4 x i32> %x) { 70 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 71 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 72 %r = add <4 x i32> %a, %b 73 ret <4 x i32> %r 74 } 75 76 ; SSSE3-LABEL: phaddd5: 77 ; SSSE3-NOT: vphaddd 78 ; SSSE3: phaddd 79 ; AVX-LABEL: phaddd5: 80 ; AVX: vphaddd 81 define <4 x i32> @phaddd5(<4 x i32> %x) { 82 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef> 83 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef> 84 %r = add <4 x i32> %a, %b 85 ret <4 x i32> %r 86 } 87 88 ; SSSE3-LABEL: phaddd6: 89 ; SSSE3-NOT: vphaddd 90 ; SSSE3: phaddd 91 ; AVX-LABEL: phaddd6: 92 ; AVX: vphaddd 93 define <4 x i32> @phaddd6(<4 x i32> %x) { 94 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 95 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 96 %r = add <4 x i32> %a, %b 97 ret <4 x i32> %r 98 } 99 100 ; SSSE3-LABEL: phaddd7: 101 ; SSSE3-NOT: vphaddd 102 ; SSSE3: phaddd 103 ; AVX-LABEL: phaddd7: 104 ; AVX: vphaddd 105 define <4 x i32> @phaddd7(<4 x i32> %x) { 106 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef> 107 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef> 108 %r = add <4 x i32> %a, %b 109 ret <4 x i32> %r 110 } 111 112 ; SSSE3-LABEL: phsubw1: 113 ; SSSE3-NOT: vphsubw 114 ; SSSE3: phsubw 115 ; AVX-LABEL: phsubw1: 116 ; AVX: vphsubw 117 define <8 x i16> @phsubw1(<8 x i16> %x, <8 x i16> %y) { 118 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 119 %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 120 %r = sub <8 x i16> %a, %b 121 ret <8 x i16> %r 122 } 123 124 ; SSSE3-LABEL: phsubd1: 125 ; SSSE3-NOT: vphsubd 126 ; SSSE3: phsubd 127 ; AVX-LABEL: phsubd1: 128 ; AVX: vphsubd 129 define <4 x i32> @phsubd1(<4 x i32> %x, <4 x i32> %y) { 130 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 131 %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 132 %r = sub <4 x i32> %a, %b 133 ret <4 x i32> %r 134 } 135 136 ; SSSE3-LABEL: phsubd2: 137 ; SSSE3-NOT: vphsubd 138 ; SSSE3: phsubd 139 ; AVX-LABEL: phsubd2: 140 ; AVX: vphsubd 141 define <4 x i32> @phsubd2(<4 x i32> %x) { 142 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 143 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 144 %r = sub <4 x i32> %a, %b 145 ret <4 x i32> %r 146 } 147 148 ; SSSE3-LABEL: phsubd3: 149 ; SSSE3-NOT: vphsubd 150 ; SSSE3: phsubd 151 ; AVX-LABEL: phsubd3: 152 ; AVX: vphsubd 153 define <4 x i32> @phsubd3(<4 x i32> %x) { 154 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 155 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 156 %r = sub <4 x i32> %a, %b 157 ret <4 x i32> %r 158 } 159 160 ; SSSE3-LABEL: phsubd4: 161 ; SSSE3-NOT: vphsubd 162 ; SSSE3: phsubd 163 ; AVX-LABEL: phsubd4: 164 ; AVX: vphsubd 165 define <4 x i32> @phsubd4(<4 x i32> %x) { 166 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 167 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 168 %r = sub <4 x i32> %a, %b 169 ret <4 x i32> %r 170 } 171