1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 5 6 ; The next 8 tests check for matching the horizontal op and eliminating the shuffle. 7 ; PR34111 - https://bugs.llvm.org/show_bug.cgi?id=34111 8 9 define <4 x float> @hadd_v4f32(<4 x float> %a) { 10 ; SSSE3-LABEL: hadd_v4f32: 11 ; SSSE3: # %bb.0: 12 ; SSSE3-NEXT: haddps %xmm0, %xmm0 13 ; SSSE3-NEXT: retq 14 ; 15 ; AVX-LABEL: hadd_v4f32: 16 ; AVX: # %bb.0: 17 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 18 ; AVX-NEXT: retq 19 %a02 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 2> 20 %a13 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 1, i32 3> 21 %hop = fadd <2 x float> %a02, %a13 22 %shuf = shufflevector <2 x float> %hop, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 23 ret <4 x float> %shuf 24 } 25 26 define <8 x float> @hadd_v8f32a(<8 x float> %a) { 27 ; SSSE3-LABEL: hadd_v8f32a: 28 ; SSSE3: # %bb.0: 29 ; SSSE3-NEXT: movaps %xmm0, %xmm2 30 ; SSSE3-NEXT: haddps %xmm1, %xmm2 31 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm2[0,0] 32 ; SSSE3-NEXT: movaps %xmm2, %xmm1 33 ; SSSE3-NEXT: retq 34 ; 35 ; AVX1-LABEL: hadd_v8f32a: 36 ; AVX1: # %bb.0: 37 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 38 ; AVX1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 39 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] 40 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 41 ; AVX1-NEXT: retq 42 ; 43 ; AVX2-LABEL: hadd_v8f32a: 44 ; AVX2: # %bb.0: 45 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 46 ; AVX2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 47 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1] 48 ; AVX2-NEXT: retq 49 %a0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 50 %a1 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 51 %hop = fadd <4 x float> %a0, %a1 52 %shuf = shufflevector <4 x float> %hop, <4 x float> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3> 53 ret <8 x float> %shuf 54 } 55 56 define <8 x float> @hadd_v8f32b(<8 x float> %a) { 57 ; SSSE3-LABEL: hadd_v8f32b: 58 ; SSSE3: # %bb.0: 59 ; SSSE3-NEXT: haddps %xmm0, %xmm0 60 ; SSSE3-NEXT: haddps %xmm1, %xmm1 61 ; SSSE3-NEXT: retq 62 ; 63 ; AVX-LABEL: hadd_v8f32b: 64 ; AVX: # %bb.0: 65 ; AVX-NEXT: vhaddps %ymm0, %ymm0, %ymm0 66 ; AVX-NEXT: retq 67 %a0 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 4, i32 6, i32 undef, i32 undef> 68 %a1 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 5, i32 7, i32 undef, i32 undef> 69 %hop = fadd <8 x float> %a0, %a1 70 %shuf = shufflevector <8 x float> %hop, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 71 ret <8 x float> %shuf 72 } 73 74 define <4 x float> @hsub_v4f32(<4 x float> %a) { 75 ; SSSE3-LABEL: hsub_v4f32: 76 ; SSSE3: # %bb.0: 77 ; SSSE3-NEXT: hsubps %xmm0, %xmm0 78 ; SSSE3-NEXT: retq 79 ; 80 ; AVX-LABEL: hsub_v4f32: 81 ; AVX: # %bb.0: 82 ; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 83 ; AVX-NEXT: retq 84 %a02 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 2> 85 %a13 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 1, i32 3> 86 %hop = fsub <2 x float> %a02, %a13 87 %shuf = shufflevector <2 x float> %hop, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 88 ret <4 x float> %shuf 89 } 90 91 define <8 x float> @hsub_v8f32a(<8 x float> %a) { 92 ; SSSE3-LABEL: hsub_v8f32a: 93 ; SSSE3: # %bb.0: 94 ; SSSE3-NEXT: movaps %xmm0, %xmm2 95 ; SSSE3-NEXT: hsubps %xmm1, %xmm2 96 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm2[0,0] 97 ; SSSE3-NEXT: movaps %xmm2, %xmm1 98 ; SSSE3-NEXT: retq 99 ; 100 ; AVX1-LABEL: hsub_v8f32a: 101 ; AVX1: # %bb.0: 102 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 103 ; AVX1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 104 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] 105 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 106 ; AVX1-NEXT: retq 107 ; 108 ; AVX2-LABEL: hsub_v8f32a: 109 ; AVX2: # %bb.0: 110 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 111 ; AVX2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 112 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1] 113 ; AVX2-NEXT: retq 114 %a0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 115 %a1 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 116 %hop = fsub <4 x float> %a0, %a1 117 %shuf = shufflevector <4 x float> %hop, <4 x float> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3> 118 ret <8 x float> %shuf 119 } 120 121 define <8 x float> @hsub_v8f32b(<8 x float> %a) { 122 ; SSSE3-LABEL: hsub_v8f32b: 123 ; SSSE3: # %bb.0: 124 ; SSSE3-NEXT: hsubps %xmm0, %xmm0 125 ; SSSE3-NEXT: hsubps %xmm1, %xmm1 126 ; SSSE3-NEXT: retq 127 ; 128 ; AVX-LABEL: hsub_v8f32b: 129 ; AVX: # %bb.0: 130 ; AVX-NEXT: vhsubps %ymm0, %ymm0, %ymm0 131 ; AVX-NEXT: retq 132 %a0 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 4, i32 6, i32 undef, i32 undef> 133 %a1 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 5, i32 7, i32 undef, i32 undef> 134 %hop = fsub <8 x float> %a0, %a1 135 %shuf = shufflevector <8 x float> %hop, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 136 ret <8 x float> %shuf 137 } 138 139 define <2 x double> @hadd_v2f64(<2 x double> %a) { 140 ; SSSE3-LABEL: hadd_v2f64: 141 ; SSSE3: # %bb.0: 142 ; SSSE3-NEXT: haddpd %xmm0, %xmm0 143 ; SSSE3-NEXT: retq 144 ; 145 ; AVX-LABEL: hadd_v2f64: 146 ; AVX: # %bb.0: 147 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 148 ; AVX-NEXT: retq 149 %a0 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 0, i32 undef> 150 %a1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef> 151 %hop = fadd <2 x double> %a0, %a1 152 %shuf = shufflevector <2 x double> %hop, <2 x double> undef, <2 x i32> <i32 0, i32 0> 153 ret <2 x double> %shuf 154 } 155 156 define <4 x double> @hadd_v4f64(<4 x double> %a) { 157 ; SSSE3-LABEL: hadd_v4f64: 158 ; SSSE3: # %bb.0: 159 ; SSSE3-NEXT: haddpd %xmm0, %xmm0 160 ; SSSE3-NEXT: haddpd %xmm1, %xmm1 161 ; SSSE3-NEXT: retq 162 ; 163 ; AVX-LABEL: hadd_v4f64: 164 ; AVX: # %bb.0: 165 ; AVX-NEXT: vhaddpd %ymm0, %ymm0, %ymm0 166 ; AVX-NEXT: retq 167 %a0 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 2, i32 undef> 168 %a1 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef> 169 %hop = fadd <4 x double> %a0, %a1 170 %shuf = shufflevector <4 x double> %hop, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 171 ret <4 x double> %shuf 172 } 173 174 define <2 x double> @hsub_v2f64(<2 x double> %a) { 175 ; SSSE3-LABEL: hsub_v2f64: 176 ; SSSE3: # %bb.0: 177 ; SSSE3-NEXT: hsubpd %xmm0, %xmm0 178 ; SSSE3-NEXT: retq 179 ; 180 ; AVX-LABEL: hsub_v2f64: 181 ; AVX: # %bb.0: 182 ; AVX-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 183 ; AVX-NEXT: retq 184 %a0 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 0, i32 undef> 185 %a1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef> 186 %hop = fsub <2 x double> %a0, %a1 187 %shuf = shufflevector <2 x double> %hop, <2 x double> undef, <2 x i32> <i32 undef, i32 0> 188 ret <2 x double> %shuf 189 } 190 191 define <4 x double> @hsub_v4f64(<4 x double> %a) { 192 ; SSSE3-LABEL: hsub_v4f64: 193 ; SSSE3: # %bb.0: 194 ; SSSE3-NEXT: hsubpd %xmm0, %xmm0 195 ; SSSE3-NEXT: hsubpd %xmm1, %xmm1 196 ; SSSE3-NEXT: retq 197 ; 198 ; AVX-LABEL: hsub_v4f64: 199 ; AVX: # %bb.0: 200 ; AVX-NEXT: vhsubpd %ymm0, %ymm0, %ymm0 201 ; AVX-NEXT: retq 202 %a0 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 2, i32 undef> 203 %a1 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef> 204 %hop = fsub <4 x double> %a0, %a1 205 %shuf = shufflevector <4 x double> %hop, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 206 ret <4 x double> %shuf 207 } 208 209 define <4 x i32> @hadd_v4i32(<4 x i32> %a) { 210 ; SSSE3-LABEL: hadd_v4i32: 211 ; SSSE3: # %bb.0: 212 ; SSSE3-NEXT: phaddd %xmm0, %xmm0 213 ; SSSE3-NEXT: retq 214 ; 215 ; AVX-LABEL: hadd_v4i32: 216 ; AVX: # %bb.0: 217 ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 218 ; AVX-NEXT: retq 219 %a02 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 220 %a13 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 221 %hop = add <4 x i32> %a02, %a13 222 %shuf = shufflevector <4 x i32> %hop, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 1> 223 ret <4 x i32> %shuf 224 } 225 226 define <8 x i32> @hadd_v8i32a(<8 x i32> %a) { 227 ; SSSE3-LABEL: hadd_v8i32a: 228 ; SSSE3: # %bb.0: 229 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 230 ; SSSE3-NEXT: phaddd %xmm1, %xmm2 231 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,0,1] 232 ; SSSE3-NEXT: movdqa %xmm2, %xmm1 233 ; SSSE3-NEXT: retq 234 ; 235 ; AVX1-LABEL: hadd_v8i32a: 236 ; AVX1: # %bb.0: 237 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 238 ; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 239 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 240 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 241 ; AVX1-NEXT: retq 242 ; 243 ; AVX2-LABEL: hadd_v8i32a: 244 ; AVX2: # %bb.0: 245 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 246 ; AVX2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 247 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] 248 ; AVX2-NEXT: retq 249 %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 250 %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 251 %hop = add <4 x i32> %a0, %a1 252 %shuf = shufflevector <4 x i32> %hop, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3> 253 ret <8 x i32> %shuf 254 } 255 256 define <8 x i32> @hadd_v8i32b(<8 x i32> %a) { 257 ; SSSE3-LABEL: hadd_v8i32b: 258 ; SSSE3: # %bb.0: 259 ; SSSE3-NEXT: phaddd %xmm0, %xmm0 260 ; SSSE3-NEXT: phaddd %xmm1, %xmm1 261 ; SSSE3-NEXT: retq 262 ; 263 ; AVX1-LABEL: hadd_v8i32b: 264 ; AVX1: # %bb.0: 265 ; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm1 266 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 267 ; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0 268 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 269 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 270 ; AVX1-NEXT: retq 271 ; 272 ; AVX2-LABEL: hadd_v8i32b: 273 ; AVX2: # %bb.0: 274 ; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0 275 ; AVX2-NEXT: retq 276 %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 4, i32 6, i32 undef, i32 undef> 277 %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 5, i32 7, i32 undef, i32 undef> 278 %hop = add <8 x i32> %a0, %a1 279 %shuf = shufflevector <8 x i32> %hop, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 280 ret <8 x i32> %shuf 281 } 282 283 define <4 x i32> @hsub_v4i32(<4 x i32> %a) { 284 ; SSSE3-LABEL: hsub_v4i32: 285 ; SSSE3: # %bb.0: 286 ; SSSE3-NEXT: phsubd %xmm0, %xmm0 287 ; SSSE3-NEXT: retq 288 ; 289 ; AVX-LABEL: hsub_v4i32: 290 ; AVX: # %bb.0: 291 ; AVX-NEXT: vphsubd %xmm0, %xmm0, %xmm0 292 ; AVX-NEXT: retq 293 %a02 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 294 %a13 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 295 %hop = sub <4 x i32> %a02, %a13 296 %shuf = shufflevector <4 x i32> %hop, <4 x i32> undef, <4 x i32> <i32 undef, i32 1, i32 0, i32 undef> 297 ret <4 x i32> %shuf 298 } 299 300 define <8 x i32> @hsub_v8i32a(<8 x i32> %a) { 301 ; SSSE3-LABEL: hsub_v8i32a: 302 ; SSSE3: # %bb.0: 303 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 304 ; SSSE3-NEXT: phsubd %xmm1, %xmm2 305 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,0,1] 306 ; SSSE3-NEXT: movdqa %xmm2, %xmm1 307 ; SSSE3-NEXT: retq 308 ; 309 ; AVX1-LABEL: hsub_v8i32a: 310 ; AVX1: # %bb.0: 311 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 312 ; AVX1-NEXT: vphsubd %xmm1, %xmm0, %xmm0 313 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 314 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 315 ; AVX1-NEXT: retq 316 ; 317 ; AVX2-LABEL: hsub_v8i32a: 318 ; AVX2: # %bb.0: 319 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 320 ; AVX2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 321 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] 322 ; AVX2-NEXT: retq 323 %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 324 %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 325 %hop = sub <4 x i32> %a0, %a1 326 %shuf = shufflevector <4 x i32> %hop, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3> 327 ret <8 x i32> %shuf 328 } 329 330 define <8 x i32> @hsub_v8i32b(<8 x i32> %a) { 331 ; SSSE3-LABEL: hsub_v8i32b: 332 ; SSSE3: # %bb.0: 333 ; SSSE3-NEXT: phsubd %xmm0, %xmm0 334 ; SSSE3-NEXT: phsubd %xmm1, %xmm1 335 ; SSSE3-NEXT: retq 336 ; 337 ; AVX1-LABEL: hsub_v8i32b: 338 ; AVX1: # %bb.0: 339 ; AVX1-NEXT: vphsubd %xmm0, %xmm0, %xmm1 340 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 341 ; AVX1-NEXT: vphsubd %xmm0, %xmm0, %xmm0 342 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 343 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 344 ; AVX1-NEXT: retq 345 ; 346 ; AVX2-LABEL: hsub_v8i32b: 347 ; AVX2: # %bb.0: 348 ; AVX2-NEXT: vphsubd %ymm0, %ymm0, %ymm0 349 ; AVX2-NEXT: retq 350 %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 4, i32 6, i32 undef, i32 undef> 351 %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 5, i32 7, i32 undef, i32 undef> 352 %hop = sub <8 x i32> %a0, %a1 353 %shuf = shufflevector <8 x i32> %hop, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 354 ret <8 x i32> %shuf 355 } 356 357 define <8 x i16> @hadd_v8i16(<8 x i16> %a) { 358 ; SSSE3-LABEL: hadd_v8i16: 359 ; SSSE3: # %bb.0: 360 ; SSSE3-NEXT: phaddw %xmm0, %xmm0 361 ; SSSE3-NEXT: retq 362 ; 363 ; AVX-LABEL: hadd_v8i16: 364 ; AVX: # %bb.0: 365 ; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm0 366 ; AVX-NEXT: retq 367 %a0246 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> 368 %a1357 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 369 %hop = add <8 x i16> %a0246, %a1357 370 %shuf = shufflevector <8 x i16> %hop, <8 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3> 371 ret <8 x i16> %shuf 372 } 373 374 define <16 x i16> @hadd_v16i16a(<16 x i16> %a) { 375 ; SSSE3-LABEL: hadd_v16i16a: 376 ; SSSE3: # %bb.0: 377 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 378 ; SSSE3-NEXT: phaddw %xmm1, %xmm2 379 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,0,1] 380 ; SSSE3-NEXT: movdqa %xmm2, %xmm1 381 ; SSSE3-NEXT: retq 382 ; 383 ; AVX1-LABEL: hadd_v16i16a: 384 ; AVX1: # %bb.0: 385 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 386 ; AVX1-NEXT: vphaddw %xmm1, %xmm0, %xmm0 387 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 388 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 389 ; AVX1-NEXT: retq 390 ; 391 ; AVX2-LABEL: hadd_v16i16a: 392 ; AVX2: # %bb.0: 393 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 394 ; AVX2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 395 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] 396 ; AVX2-NEXT: retq 397 %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 398 %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 399 %hop = add <8 x i16> %a0, %a1 400 %shuf = shufflevector <8 x i16> %hop, <8 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7> 401 ret <16 x i16> %shuf 402 } 403 404 define <16 x i16> @hadd_v16i16b(<16 x i16> %a) { 405 ; SSSE3-LABEL: hadd_v16i16b: 406 ; SSSE3: # %bb.0: 407 ; SSSE3-NEXT: phaddw %xmm0, %xmm0 408 ; SSSE3-NEXT: phaddw %xmm1, %xmm1 409 ; SSSE3-NEXT: retq 410 ; 411 ; AVX1-LABEL: hadd_v16i16b: 412 ; AVX1: # %bb.0: 413 ; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm1 414 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 415 ; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm0 416 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 417 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 418 ; AVX1-NEXT: retq 419 ; 420 ; AVX2-LABEL: hadd_v16i16b: 421 ; AVX2: # %bb.0: 422 ; AVX2-NEXT: vphaddw %ymm0, %ymm0, %ymm0 423 ; AVX2-NEXT: retq 424 %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 undef, i32 undef, i32 undef, i32 undef> 425 %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 11, i32 13, i32 15, i32 undef, i32 undef, i32 undef, i32 undef> 426 %hop = add <16 x i16> %a0, %a1 427 %shuf = shufflevector <16 x i16> %hop, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11> 428 ret <16 x i16> %shuf 429 } 430 431 define <8 x i16> @hsub_v8i16(<8 x i16> %a) { 432 ; SSSE3-LABEL: hsub_v8i16: 433 ; SSSE3: # %bb.0: 434 ; SSSE3-NEXT: phsubw %xmm0, %xmm0 435 ; SSSE3-NEXT: retq 436 ; 437 ; AVX-LABEL: hsub_v8i16: 438 ; AVX: # %bb.0: 439 ; AVX-NEXT: vphsubw %xmm0, %xmm0, %xmm0 440 ; AVX-NEXT: retq 441 %a0246 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> 442 %a1357 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 443 %hop = sub <8 x i16> %a0246, %a1357 444 %shuf = shufflevector <8 x i16> %hop, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 undef, i32 1, i32 undef, i32 3> 445 ret <8 x i16> %shuf 446 } 447 448 define <16 x i16> @hsub_v16i16a(<16 x i16> %a) { 449 ; SSSE3-LABEL: hsub_v16i16a: 450 ; SSSE3: # %bb.0: 451 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 452 ; SSSE3-NEXT: phsubw %xmm1, %xmm2 453 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,0,1] 454 ; SSSE3-NEXT: movdqa %xmm2, %xmm1 455 ; SSSE3-NEXT: retq 456 ; 457 ; AVX1-LABEL: hsub_v16i16a: 458 ; AVX1: # %bb.0: 459 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 460 ; AVX1-NEXT: vphsubw %xmm1, %xmm0, %xmm0 461 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 462 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 463 ; AVX1-NEXT: retq 464 ; 465 ; AVX2-LABEL: hsub_v16i16a: 466 ; AVX2: # %bb.0: 467 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 468 ; AVX2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 469 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] 470 ; AVX2-NEXT: retq 471 %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 472 %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 473 %hop = sub <8 x i16> %a0, %a1 474 %shuf = shufflevector <8 x i16> %hop, <8 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7> 475 ret <16 x i16> %shuf 476 } 477 478 define <16 x i16> @hsub_v16i16b(<16 x i16> %a) { 479 ; SSSE3-LABEL: hsub_v16i16b: 480 ; SSSE3: # %bb.0: 481 ; SSSE3-NEXT: phsubw %xmm0, %xmm0 482 ; SSSE3-NEXT: phsubw %xmm1, %xmm1 483 ; SSSE3-NEXT: retq 484 ; 485 ; AVX1-LABEL: hsub_v16i16b: 486 ; AVX1: # %bb.0: 487 ; AVX1-NEXT: vphsubw %xmm0, %xmm0, %xmm1 488 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 489 ; AVX1-NEXT: vphsubw %xmm0, %xmm0, %xmm0 490 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 491 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 492 ; AVX1-NEXT: retq 493 ; 494 ; AVX2-LABEL: hsub_v16i16b: 495 ; AVX2: # %bb.0: 496 ; AVX2-NEXT: vphsubw %ymm0, %ymm0, %ymm0 497 ; AVX2-NEXT: retq 498 %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 undef, i32 undef, i32 undef, i32 undef> 499 %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 11, i32 13, i32 15, i32 undef, i32 undef, i32 undef, i32 undef> 500 %hop = sub <16 x i16> %a0, %a1 501 %shuf = shufflevector <16 x i16> %hop, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11> 502 ret <16 x i16> %shuf 503 } 504