1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 4 5 define <8 x i16> @phaddw1(<8 x i16> %x, <8 x i16> %y) { 6 ; SSSE3-LABEL: phaddw1: 7 ; SSSE3: # %bb.0: 8 ; SSSE3-NEXT: phaddw %xmm1, %xmm0 9 ; SSSE3-NEXT: retq 10 ; 11 ; AVX-LABEL: phaddw1: 12 ; AVX: # %bb.0: 13 ; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 14 ; AVX-NEXT: retq 15 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 16 %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 17 %r = add <8 x i16> %a, %b 18 ret <8 x i16> %r 19 } 20 21 define <8 x i16> @phaddw2(<8 x i16> %x, <8 x i16> %y) { 22 ; SSSE3-LABEL: phaddw2: 23 ; SSSE3: # %bb.0: 24 ; SSSE3-NEXT: phaddw %xmm1, %xmm0 25 ; SSSE3-NEXT: retq 26 ; 27 ; AVX-LABEL: phaddw2: 28 ; AVX: # %bb.0: 29 ; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 30 ; AVX-NEXT: retq 31 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14> 32 %b = shufflevector <8 x i16> %y, <8 x i16> %x, <8 x i32> <i32 8, i32 11, i32 12, i32 15, i32 0, i32 3, i32 4, i32 7> 33 %r = add <8 x i16> %a, %b 34 ret <8 x i16> %r 35 } 36 37 define <4 x i32> @phaddd1(<4 x i32> %x, <4 x i32> %y) { 38 ; SSSE3-LABEL: phaddd1: 39 ; SSSE3: # %bb.0: 40 ; SSSE3-NEXT: phaddd %xmm1, %xmm0 41 ; SSSE3-NEXT: retq 42 ; 43 ; AVX-LABEL: phaddd1: 44 ; AVX: # %bb.0: 45 ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 46 ; AVX-NEXT: retq 47 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 48 %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 49 %r = add <4 x i32> %a, %b 50 ret <4 x i32> %r 51 } 52 53 define <4 x i32> @phaddd2(<4 x i32> %x, <4 x i32> %y) { 54 ; SSSE3-LABEL: phaddd2: 55 ; SSSE3: # %bb.0: 56 ; SSSE3-NEXT: phaddd %xmm1, %xmm0 57 ; SSSE3-NEXT: retq 58 ; 59 ; AVX-LABEL: phaddd2: 60 ; AVX: # %bb.0: 61 ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 62 ; AVX-NEXT: retq 63 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6> 64 %b = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3> 65 %r = add <4 x i32> %a, %b 66 ret <4 x i32> %r 67 } 68 69 define <4 x i32> @phaddd3(<4 x i32> %x) { 70 ; SSSE3-LABEL: phaddd3: 71 ; SSSE3: # %bb.0: 72 ; SSSE3-NEXT: phaddd %xmm0, %xmm0 73 ; SSSE3-NEXT: retq 74 ; 75 ; AVX-LABEL: phaddd3: 76 ; AVX: # %bb.0: 77 ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 78 ; AVX-NEXT: retq 79 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 80 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 81 %r = add <4 x i32> %a, %b 82 ret <4 x i32> %r 83 } 84 85 define <4 x i32> @phaddd4(<4 x i32> %x) { 86 ; SSSE3-LABEL: phaddd4: 87 ; SSSE3: # %bb.0: 88 ; SSSE3-NEXT: phaddd %xmm0, %xmm0 89 ; SSSE3-NEXT: retq 90 ; 91 ; AVX-LABEL: phaddd4: 92 ; AVX: # %bb.0: 93 ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 94 ; AVX-NEXT: retq 95 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 96 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 97 %r = add <4 x i32> %a, %b 98 ret <4 x i32> %r 99 } 100 101 define <4 x i32> @phaddd5(<4 x i32> %x) { 102 ; SSSE3-LABEL: phaddd5: 103 ; SSSE3: # %bb.0: 104 ; SSSE3-NEXT: phaddd %xmm0, %xmm0 105 ; SSSE3-NEXT: retq 106 ; 107 ; AVX-LABEL: phaddd5: 108 ; AVX: # %bb.0: 109 ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 110 ; AVX-NEXT: retq 111 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef> 112 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef> 113 %r = add <4 x i32> %a, %b 114 ret <4 x i32> %r 115 } 116 117 define <4 x i32> @phaddd6(<4 x i32> %x) { 118 ; SSSE3-LABEL: phaddd6: 119 ; SSSE3: # %bb.0: 120 ; SSSE3-NEXT: phaddd %xmm0, %xmm0 121 ; SSSE3-NEXT: retq 122 ; 123 ; AVX-LABEL: phaddd6: 124 ; AVX: # %bb.0: 125 ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 126 ; AVX-NEXT: retq 127 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 128 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 129 %r = add <4 x i32> %a, %b 130 ret <4 x i32> %r 131 } 132 133 define <4 x i32> @phaddd7(<4 x i32> %x) { 134 ; SSSE3-LABEL: phaddd7: 135 ; SSSE3: # %bb.0: 136 ; SSSE3-NEXT: phaddd %xmm0, %xmm0 137 ; SSSE3-NEXT: retq 138 ; 139 ; AVX-LABEL: phaddd7: 140 ; AVX: # %bb.0: 141 ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 142 ; AVX-NEXT: retq 143 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef> 144 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef> 145 %r = add <4 x i32> %a, %b 146 ret <4 x i32> %r 147 } 148 149 define <8 x i16> @phsubw1(<8 x i16> %x, <8 x i16> %y) { 150 ; SSSE3-LABEL: phsubw1: 151 ; SSSE3: # %bb.0: 152 ; SSSE3-NEXT: phsubw %xmm1, %xmm0 153 ; SSSE3-NEXT: retq 154 ; 155 ; AVX-LABEL: phsubw1: 156 ; AVX: # %bb.0: 157 ; AVX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 158 ; AVX-NEXT: retq 159 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 160 %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 161 %r = sub <8 x i16> %a, %b 162 ret <8 x i16> %r 163 } 164 165 define <4 x i32> @phsubd1(<4 x i32> %x, <4 x i32> %y) { 166 ; SSSE3-LABEL: phsubd1: 167 ; SSSE3: # %bb.0: 168 ; SSSE3-NEXT: phsubd %xmm1, %xmm0 169 ; SSSE3-NEXT: retq 170 ; 171 ; AVX-LABEL: phsubd1: 172 ; AVX: # %bb.0: 173 ; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 174 ; AVX-NEXT: retq 175 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 176 %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 177 %r = sub <4 x i32> %a, %b 178 ret <4 x i32> %r 179 } 180 181 define <4 x i32> @phsubd2(<4 x i32> %x) { 182 ; SSSE3-LABEL: phsubd2: 183 ; SSSE3: # %bb.0: 184 ; SSSE3-NEXT: phsubd %xmm0, %xmm0 185 ; SSSE3-NEXT: retq 186 ; 187 ; AVX-LABEL: phsubd2: 188 ; AVX: # %bb.0: 189 ; AVX-NEXT: vphsubd %xmm0, %xmm0, %xmm0 190 ; AVX-NEXT: retq 191 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 192 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 193 %r = sub <4 x i32> %a, %b 194 ret <4 x i32> %r 195 } 196 197 define <4 x i32> @phsubd3(<4 x i32> %x) { 198 ; SSSE3-LABEL: phsubd3: 199 ; SSSE3: # %bb.0: 200 ; SSSE3-NEXT: phsubd %xmm0, %xmm0 201 ; SSSE3-NEXT: retq 202 ; 203 ; AVX-LABEL: phsubd3: 204 ; AVX: # %bb.0: 205 ; AVX-NEXT: vphsubd %xmm0, %xmm0, %xmm0 206 ; AVX-NEXT: retq 207 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 208 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 209 %r = sub <4 x i32> %a, %b 210 ret <4 x i32> %r 211 } 212 213 define <4 x i32> @phsubd4(<4 x i32> %x) { 214 ; SSSE3-LABEL: phsubd4: 215 ; SSSE3: # %bb.0: 216 ; SSSE3-NEXT: phsubd %xmm0, %xmm0 217 ; SSSE3-NEXT: retq 218 ; 219 ; AVX-LABEL: phsubd4: 220 ; AVX: # %bb.0: 221 ; AVX-NEXT: vphsubd %xmm0, %xmm0, %xmm0 222 ; AVX-NEXT: retq 223 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 224 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 225 %r = sub <4 x i32> %a, %b 226 ret <4 x i32> %r 227 } 228 229 define <8 x i16> @phsubw1_reverse(<8 x i16> %x, <8 x i16> %y) { 230 ; SSSE3-LABEL: phsubw1_reverse: 231 ; SSSE3: # %bb.0: 232 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15] 233 ; SSSE3-NEXT: movdqa %xmm1, %xmm4 234 ; SSSE3-NEXT: pshufb %xmm3, %xmm4 235 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 236 ; SSSE3-NEXT: pshufb %xmm3, %xmm2 237 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] 238 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 239 ; SSSE3-NEXT: pshufb %xmm3, %xmm1 240 ; SSSE3-NEXT: pshufb %xmm3, %xmm0 241 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 242 ; SSSE3-NEXT: psubw %xmm0, %xmm2 243 ; SSSE3-NEXT: movdqa %xmm2, %xmm0 244 ; SSSE3-NEXT: retq 245 ; 246 ; AVX-LABEL: phsubw1_reverse: 247 ; AVX: # %bb.0: 248 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15] 249 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm3 250 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm2 251 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 252 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 253 ; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1 254 ; AVX-NEXT: vpshufb %xmm3, %xmm0, %xmm0 255 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 256 ; AVX-NEXT: vpsubw %xmm0, %xmm2, %xmm0 257 ; AVX-NEXT: retq 258 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 259 %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 260 %r = sub <8 x i16> %a, %b 261 ret <8 x i16> %r 262 } 263 264 define <4 x i32> @phsubd1_reverse(<4 x i32> %x, <4 x i32> %y) { 265 ; SSSE3-LABEL: phsubd1_reverse: 266 ; SSSE3: # %bb.0: 267 ; SSSE3-NEXT: movaps %xmm0, %xmm2 268 ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3] 269 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 270 ; SSSE3-NEXT: psubd %xmm0, %xmm2 271 ; SSSE3-NEXT: movdqa %xmm2, %xmm0 272 ; SSSE3-NEXT: retq 273 ; 274 ; AVX-LABEL: phsubd1_reverse: 275 ; AVX: # %bb.0: 276 ; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm0[1,3],xmm1[1,3] 277 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 278 ; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 279 ; AVX-NEXT: retq 280 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 281 %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 282 %r = sub <4 x i32> %a, %b 283 ret <4 x i32> %r 284 } 285 286