1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c 10 11 define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) { 12 ; SSE-LABEL: test_mm_abs_epi8: 13 ; SSE: # %bb.0: 14 ; SSE-NEXT: pabsb %xmm0, %xmm0 15 ; SSE-NEXT: ret{{[l|q]}} 16 ; 17 ; AVX-LABEL: test_mm_abs_epi8: 18 ; AVX: # %bb.0: 19 ; AVX-NEXT: vpabsb %xmm0, %xmm0 20 ; AVX-NEXT: ret{{[l|q]}} 21 %arg = bitcast <2 x i64> %a0 to <16 x i8> 22 %sub = sub <16 x i8> zeroinitializer, %arg 23 %cmp = icmp sgt <16 x i8> %arg, zeroinitializer 24 %sel = select <16 x i1> %cmp, <16 x i8> %arg, <16 x i8> %sub 25 %res = bitcast <16 x i8> %sel to <2 x i64> 26 ret <2 x i64> %res 27 } 28 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 29 30 define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) { 31 ; SSE-LABEL: test_mm_abs_epi16: 32 ; SSE: # %bb.0: 33 ; SSE-NEXT: pabsw %xmm0, %xmm0 34 ; SSE-NEXT: ret{{[l|q]}} 35 ; 36 ; AVX-LABEL: test_mm_abs_epi16: 37 ; AVX: # %bb.0: 38 ; AVX-NEXT: vpabsw %xmm0, %xmm0 39 ; AVX-NEXT: ret{{[l|q]}} 40 %arg = bitcast <2 x i64> %a0 to <8 x i16> 41 %sub = sub <8 x i16> zeroinitializer, %arg 42 %cmp = icmp sgt <8 x i16> %arg, zeroinitializer 43 %sel = select <8 x i1> %cmp, <8 x i16> %arg, <8 x i16> %sub 44 %res = bitcast <8 x i16> %sel to <2 x i64> 45 ret <2 x i64> %res 46 } 47 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 48 49 define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) { 50 ; SSE-LABEL: test_mm_abs_epi32: 51 ; SSE: # %bb.0: 52 ; SSE-NEXT: pabsd %xmm0, %xmm0 53 ; SSE-NEXT: ret{{[l|q]}} 54 ; 55 ; AVX-LABEL: test_mm_abs_epi32: 56 ; AVX: # %bb.0: 57 ; AVX-NEXT: vpabsd %xmm0, %xmm0 58 ; AVX-NEXT: ret{{[l|q]}} 59 %arg = bitcast <2 x i64> %a0 to <4 x i32> 60 %sub = sub <4 x i32> zeroinitializer, %arg 61 %cmp = icmp sgt <4 x i32> %arg, zeroinitializer 62 %sel = select <4 x i1> %cmp, <4 x i32> %arg, <4 x i32> %sub 63 %res = bitcast <4 x i32> %sel to <2 x i64> 64 ret <2 x i64> %res 65 } 66 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 67 68 define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { 69 ; SSE-LABEL: test_mm_alignr_epi8: 70 ; SSE: # %bb.0: 71 ; SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 72 ; SSE-NEXT: movdqa %xmm1, %xmm0 73 ; SSE-NEXT: ret{{[l|q]}} 74 ; 75 ; AVX-LABEL: test_mm_alignr_epi8: 76 ; AVX: # %bb.0: 77 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 78 ; AVX-NEXT: ret{{[l|q]}} 79 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 80 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 81 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17> 82 %res = bitcast <16 x i8> %shuf to <2 x i64> 83 ret <2 x i64> %res 84 } 85 86 define <2 x i64> @test2_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { 87 ; SSE-LABEL: test2_mm_alignr_epi8: 88 ; SSE: # %bb.0: 89 ; SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 90 ; SSE-NEXT: movdqa %xmm1, %xmm0 91 ; SSE-NEXT: ret{{[l|q]}} 92 ; 93 ; AVX-LABEL: test2_mm_alignr_epi8: 94 ; AVX: # %bb.0: 95 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 96 ; AVX-NEXT: ret{{[l|q]}} 97 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 98 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 99 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 100 %res = bitcast <16 x i8> %shuf to <2 x i64> 101 ret <2 x i64> %res 102 } 103 104 define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) { 105 ; SSE-LABEL: test_mm_hadd_epi16: 106 ; SSE: # %bb.0: 107 ; SSE-NEXT: phaddw %xmm1, %xmm0 108 ; SSE-NEXT: ret{{[l|q]}} 109 ; 110 ; AVX-LABEL: test_mm_hadd_epi16: 111 ; AVX: # %bb.0: 112 ; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 113 ; AVX-NEXT: ret{{[l|q]}} 114 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 115 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 116 %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 117 %res = bitcast <8 x i16> %call to <2 x i64> 118 ret <2 x i64> %res 119 } 120 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 121 122 define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) { 123 ; SSE-LABEL: test_mm_hadd_epi32: 124 ; SSE: # %bb.0: 125 ; SSE-NEXT: phaddd %xmm1, %xmm0 126 ; SSE-NEXT: ret{{[l|q]}} 127 ; 128 ; AVX-LABEL: test_mm_hadd_epi32: 129 ; AVX: # %bb.0: 130 ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 131 ; AVX-NEXT: ret{{[l|q]}} 132 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 133 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 134 %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 135 %res = bitcast <4 x i32> %call to <2 x i64> 136 ret <2 x i64> %res 137 } 138 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 139 140 define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) { 141 ; SSE-LABEL: test_mm_hadds_epi16: 142 ; SSE: # %bb.0: 143 ; SSE-NEXT: phaddsw %xmm1, %xmm0 144 ; SSE-NEXT: ret{{[l|q]}} 145 ; 146 ; AVX-LABEL: test_mm_hadds_epi16: 147 ; AVX: # %bb.0: 148 ; AVX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 149 ; AVX-NEXT: ret{{[l|q]}} 150 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 151 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 152 %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 153 %res = bitcast <8 x i16> %call to <2 x i64> 154 ret <2 x i64> %res 155 } 156 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 157 158 define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) { 159 ; SSE-LABEL: test_mm_hsub_epi16: 160 ; SSE: # %bb.0: 161 ; SSE-NEXT: phsubw %xmm1, %xmm0 162 ; SSE-NEXT: ret{{[l|q]}} 163 ; 164 ; AVX-LABEL: test_mm_hsub_epi16: 165 ; AVX: # %bb.0: 166 ; AVX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 167 ; AVX-NEXT: ret{{[l|q]}} 168 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 169 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 170 %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 171 %res = bitcast <8 x i16> %call to <2 x i64> 172 ret <2 x i64> %res 173 } 174 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 175 176 define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) { 177 ; SSE-LABEL: test_mm_hsub_epi32: 178 ; SSE: # %bb.0: 179 ; SSE-NEXT: phsubd %xmm1, %xmm0 180 ; SSE-NEXT: ret{{[l|q]}} 181 ; 182 ; AVX-LABEL: test_mm_hsub_epi32: 183 ; AVX: # %bb.0: 184 ; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 185 ; AVX-NEXT: ret{{[l|q]}} 186 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 187 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 188 %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 189 %res = bitcast <4 x i32> %call to <2 x i64> 190 ret <2 x i64> %res 191 } 192 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 193 194 define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 195 ; SSE-LABEL: test_mm_hsubs_epi16: 196 ; SSE: # %bb.0: 197 ; SSE-NEXT: phsubsw %xmm1, %xmm0 198 ; SSE-NEXT: ret{{[l|q]}} 199 ; 200 ; AVX-LABEL: test_mm_hsubs_epi16: 201 ; AVX: # %bb.0: 202 ; AVX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 203 ; AVX-NEXT: ret{{[l|q]}} 204 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 205 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 206 %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 207 %res = bitcast <8 x i16> %call to <2 x i64> 208 ret <2 x i64> %res 209 } 210 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 211 212 define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 213 ; SSE-LABEL: test_mm_maddubs_epi16: 214 ; SSE: # %bb.0: 215 ; SSE-NEXT: pmaddubsw %xmm1, %xmm0 216 ; SSE-NEXT: ret{{[l|q]}} 217 ; 218 ; AVX-LABEL: test_mm_maddubs_epi16: 219 ; AVX: # %bb.0: 220 ; AVX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 221 ; AVX-NEXT: ret{{[l|q]}} 222 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 223 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 224 %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1) 225 %res = bitcast <8 x i16> %call to <2 x i64> 226 ret <2 x i64> %res 227 } 228 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 229 230 define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 231 ; SSE-LABEL: test_mm_mulhrs_epi16: 232 ; SSE: # %bb.0: 233 ; SSE-NEXT: pmulhrsw %xmm1, %xmm0 234 ; SSE-NEXT: ret{{[l|q]}} 235 ; 236 ; AVX-LABEL: test_mm_mulhrs_epi16: 237 ; AVX: # %bb.0: 238 ; AVX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 239 ; AVX-NEXT: ret{{[l|q]}} 240 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 241 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 242 %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 243 %res = bitcast <8 x i16> %call to <2 x i64> 244 ret <2 x i64> %res 245 } 246 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 247 248 define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) { 249 ; SSE-LABEL: test_mm_shuffle_epi8: 250 ; SSE: # %bb.0: 251 ; SSE-NEXT: pshufb %xmm1, %xmm0 252 ; SSE-NEXT: ret{{[l|q]}} 253 ; 254 ; AVX-LABEL: test_mm_shuffle_epi8: 255 ; AVX: # %bb.0: 256 ; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 257 ; AVX-NEXT: ret{{[l|q]}} 258 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 259 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 260 %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1) 261 %res = bitcast <16 x i8> %call to <2 x i64> 262 ret <2 x i64> %res 263 } 264 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 265 266 define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) { 267 ; SSE-LABEL: test_mm_sign_epi8: 268 ; SSE: # %bb.0: 269 ; SSE-NEXT: psignb %xmm1, %xmm0 270 ; SSE-NEXT: ret{{[l|q]}} 271 ; 272 ; AVX-LABEL: test_mm_sign_epi8: 273 ; AVX: # %bb.0: 274 ; AVX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 275 ; AVX-NEXT: ret{{[l|q]}} 276 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 277 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 278 %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1) 279 %res = bitcast <16 x i8> %call to <2 x i64> 280 ret <2 x i64> %res 281 } 282 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 283 284 define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) { 285 ; SSE-LABEL: test_mm_sign_epi16: 286 ; SSE: # %bb.0: 287 ; SSE-NEXT: psignw %xmm1, %xmm0 288 ; SSE-NEXT: ret{{[l|q]}} 289 ; 290 ; AVX-LABEL: test_mm_sign_epi16: 291 ; AVX: # %bb.0: 292 ; AVX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 293 ; AVX-NEXT: ret{{[l|q]}} 294 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 295 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 296 %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 297 %res = bitcast <8 x i16> %call to <2 x i64> 298 ret <2 x i64> %res 299 } 300 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 301 302 define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) { 303 ; SSE-LABEL: test_mm_sign_epi32: 304 ; SSE: # %bb.0: 305 ; SSE-NEXT: psignd %xmm1, %xmm0 306 ; SSE-NEXT: ret{{[l|q]}} 307 ; 308 ; AVX-LABEL: test_mm_sign_epi32: 309 ; AVX: # %bb.0: 310 ; AVX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 311 ; AVX-NEXT: ret{{[l|q]}} 312 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 313 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 314 %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 315 %res = bitcast <4 x i32> %call to <2 x i64> 316 ret <2 x i64> %res 317 } 318 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 319