1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX2 3 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=X86 --check-prefix=X86-AVX512 4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX2 5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=X64 --check-prefix=X64-AVX512 6 7 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 8 ; X86-LABEL: test_x86_avx2_pblendw: 9 ; X86: ## %bb.0: 10 ; X86-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 11 ; X86-NEXT: retl 12 ; 13 ; X64-LABEL: test_x86_avx2_pblendw: 14 ; X64: ## %bb.0: 15 ; X64-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 16 ; X64-NEXT: retq 17 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1] 18 ret <16 x i16> %res 19 } 20 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone 21 22 23 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 24 ; X86-LABEL: test_x86_avx2_pblendd_128: 25 ; X86: ## %bb.0: 26 ; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 27 ; X86-NEXT: retl 28 ; 29 ; X64-LABEL: test_x86_avx2_pblendd_128: 30 ; X64: ## %bb.0: 31 ; X64-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 32 ; X64-NEXT: retq 33 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1] 34 ret <4 x i32> %res 35 } 36 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone 37 38 39 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 40 ; X86-LABEL: test_x86_avx2_pblendd_256: 41 ; X86: ## %bb.0: 42 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 43 ; X86-NEXT: retl 44 ; 45 ; X64-LABEL: test_x86_avx2_pblendd_256: 46 ; X64: ## %bb.0: 47 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 48 ; X64-NEXT: retq 49 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1] 50 ret <8 x i32> %res 51 } 52 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone 53 54 55 define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) { 56 ; X86-LABEL: test_x86_avx2_movntdqa: 57 ; X86: ## %bb.0: 58 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 59 ; X86-NEXT: vmovntdqa (%eax), %ymm0 60 ; X86-NEXT: retl 61 ; 62 ; X64-LABEL: test_x86_avx2_movntdqa: 63 ; X64: ## %bb.0: 64 ; X64-NEXT: vmovntdqa (%rdi), %ymm0 65 ; X64-NEXT: retq 66 %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1] 67 ret <4 x i64> %res 68 } 69 declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly 70 71 72 define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) { 73 ; X86-LABEL: test_x86_avx2_mpsadbw: 74 ; X86: ## %bb.0: 75 ; X86-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 76 ; X86-NEXT: retl 77 ; 78 ; X64-LABEL: test_x86_avx2_mpsadbw: 79 ; X64: ## %bb.0: 80 ; X64-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 81 ; X64-NEXT: retq 82 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1] 83 ret <16 x i16> %res 84 } 85 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone 86 87 88 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) { 89 ; X86-LABEL: test_x86_avx2_psll_dq_bs: 90 ; X86: ## %bb.0: 91 ; X86-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24] 92 ; X86-NEXT: retl 93 ; 94 ; X64-LABEL: test_x86_avx2_psll_dq_bs: 95 ; X64: ## %bb.0: 96 ; X64-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24] 97 ; X64-NEXT: retq 98 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 99 ret <4 x i64> %res 100 } 101 declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone 102 103 104 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) { 105 ; X86-LABEL: test_x86_avx2_psrl_dq_bs: 106 ; X86: ## %bb.0: 107 ; X86-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero 108 ; X86-NEXT: retl 109 ; 110 ; X64-LABEL: test_x86_avx2_psrl_dq_bs: 111 ; X64: ## %bb.0: 112 ; X64-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero 113 ; X64-NEXT: retq 114 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 115 ret <4 x i64> %res 116 } 117 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone 118 119 120 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) { 121 ; X86-LABEL: test_x86_avx2_psll_dq: 122 ; X86: ## %bb.0: 123 ; X86-NEXT: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 124 ; X86-NEXT: retl 125 ; 126 ; X64-LABEL: test_x86_avx2_psll_dq: 127 ; X64: ## %bb.0: 128 ; X64-NEXT: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 129 ; X64-NEXT: retq 130 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1] 131 ret <4 x i64> %res 132 } 133 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone 134 135 136 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) { 137 ; X86-LABEL: test_x86_avx2_psrl_dq: 138 ; X86: ## %bb.0: 139 ; X86-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero 140 ; X86-NEXT: retl 141 ; 142 ; X64-LABEL: test_x86_avx2_psrl_dq: 143 ; X64: ## %bb.0: 144 ; X64-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero 145 ; X64-NEXT: retq 146 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1] 147 ret <4 x i64> %res 148 } 149 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone 150 151 152 define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) { 153 ; X86-LABEL: test_x86_avx2_vextracti128: 154 ; X86: ## %bb.0: 155 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm0 156 ; X86-NEXT: vzeroupper 157 ; X86-NEXT: retl 158 ; 159 ; X64-LABEL: test_x86_avx2_vextracti128: 160 ; X64: ## %bb.0: 161 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 162 ; X64-NEXT: vzeroupper 163 ; X64-NEXT: retq 164 %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) 165 ret <2 x i64> %res 166 } 167 declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone 168 169 170 define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) { 171 ; X86-LABEL: test_x86_avx2_vinserti128: 172 ; X86: ## %bb.0: 173 ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 174 ; X86-NEXT: retl 175 ; 176 ; X64-LABEL: test_x86_avx2_vinserti128: 177 ; X64: ## %bb.0: 178 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 179 ; X64-NEXT: retq 180 %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) 181 ret <4 x i64> %res 182 } 183 declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone 184 185 186 define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) { 187 ; X86-LABEL: test_x86_avx2_vbroadcast_sd_pd_256: 188 ; X86: ## %bb.0: 189 ; X86-NEXT: vbroadcastsd %xmm0, %ymm0 190 ; X86-NEXT: retl 191 ; 192 ; X64-LABEL: test_x86_avx2_vbroadcast_sd_pd_256: 193 ; X64: ## %bb.0: 194 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0 195 ; X64-NEXT: retq 196 %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) 197 ret <4 x double> %res 198 } 199 declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly 200 201 202 define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) { 203 ; X86-LABEL: test_x86_avx2_vbroadcast_ss_ps: 204 ; X86: ## %bb.0: 205 ; X86-NEXT: vbroadcastss %xmm0, %xmm0 206 ; X86-NEXT: retl 207 ; 208 ; X64-LABEL: test_x86_avx2_vbroadcast_ss_ps: 209 ; X64: ## %bb.0: 210 ; X64-NEXT: vbroadcastss %xmm0, %xmm0 211 ; X64-NEXT: retq 212 %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) 213 ret <4 x float> %res 214 } 215 declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly 216 217 218 define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) { 219 ; X86-LABEL: test_x86_avx2_vbroadcast_ss_ps_256: 220 ; X86: ## %bb.0: 221 ; X86-NEXT: vbroadcastss %xmm0, %ymm0 222 ; X86-NEXT: retl 223 ; 224 ; X64-LABEL: test_x86_avx2_vbroadcast_ss_ps_256: 225 ; X64: ## %bb.0: 226 ; X64-NEXT: vbroadcastss %xmm0, %ymm0 227 ; X64-NEXT: retq 228 %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) 229 ret <8 x float> %res 230 } 231 declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly 232 233 234 define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) { 235 ; X86-LABEL: test_x86_avx2_pbroadcastb_128: 236 ; X86: ## %bb.0: 237 ; X86-NEXT: vpbroadcastb %xmm0, %xmm0 238 ; X86-NEXT: retl 239 ; 240 ; X64-LABEL: test_x86_avx2_pbroadcastb_128: 241 ; X64: ## %bb.0: 242 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0 243 ; X64-NEXT: retq 244 %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) 245 ret <16 x i8> %res 246 } 247 declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly 248 249 250 define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) { 251 ; X86-LABEL: test_x86_avx2_pbroadcastb_256: 252 ; X86: ## %bb.0: 253 ; X86-NEXT: vpbroadcastb %xmm0, %ymm0 254 ; X86-NEXT: retl 255 ; 256 ; X64-LABEL: test_x86_avx2_pbroadcastb_256: 257 ; X64: ## %bb.0: 258 ; X64-NEXT: vpbroadcastb %xmm0, %ymm0 259 ; X64-NEXT: retq 260 %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) 261 ret <32 x i8> %res 262 } 263 declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly 264 265 266 define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) { 267 ; X86-LABEL: test_x86_avx2_pbroadcastw_128: 268 ; X86: ## %bb.0: 269 ; X86-NEXT: vpbroadcastw %xmm0, %xmm0 270 ; X86-NEXT: retl 271 ; 272 ; X64-LABEL: test_x86_avx2_pbroadcastw_128: 273 ; X64: ## %bb.0: 274 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0 275 ; X64-NEXT: retq 276 %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) 277 ret <8 x i16> %res 278 } 279 declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly 280 281 282 define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) { 283 ; X86-LABEL: test_x86_avx2_pbroadcastw_256: 284 ; X86: ## %bb.0: 285 ; X86-NEXT: vpbroadcastw %xmm0, %ymm0 286 ; X86-NEXT: retl 287 ; 288 ; X64-LABEL: test_x86_avx2_pbroadcastw_256: 289 ; X64: ## %bb.0: 290 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0 291 ; X64-NEXT: retq 292 %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) 293 ret <16 x i16> %res 294 } 295 declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly 296 297 298 define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) { 299 ; X86-LABEL: test_x86_avx2_pbroadcastd_128: 300 ; X86: ## %bb.0: 301 ; X86-NEXT: vbroadcastss %xmm0, %xmm0 302 ; X86-NEXT: retl 303 ; 304 ; X64-LABEL: test_x86_avx2_pbroadcastd_128: 305 ; X64: ## %bb.0: 306 ; X64-NEXT: vbroadcastss %xmm0, %xmm0 307 ; X64-NEXT: retq 308 %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) 309 ret <4 x i32> %res 310 } 311 declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly 312 313 314 define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) { 315 ; X86-LABEL: test_x86_avx2_pbroadcastd_256: 316 ; X86: ## %bb.0: 317 ; X86-NEXT: vbroadcastss %xmm0, %ymm0 318 ; X86-NEXT: retl 319 ; 320 ; X64-LABEL: test_x86_avx2_pbroadcastd_256: 321 ; X64: ## %bb.0: 322 ; X64-NEXT: vbroadcastss %xmm0, %ymm0 323 ; X64-NEXT: retq 324 %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) 325 ret <8 x i32> %res 326 } 327 declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly 328 329 330 define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) { 331 ; X86-LABEL: test_x86_avx2_pbroadcastq_128: 332 ; X86: ## %bb.0: 333 ; X86-NEXT: vpbroadcastq %xmm0, %xmm0 334 ; X86-NEXT: retl 335 ; 336 ; X64-LABEL: test_x86_avx2_pbroadcastq_128: 337 ; X64: ## %bb.0: 338 ; X64-NEXT: vpbroadcastq %xmm0, %xmm0 339 ; X64-NEXT: retq 340 %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) 341 ret <2 x i64> %res 342 } 343 declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly 344 345 346 define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) { 347 ; X86-LABEL: test_x86_avx2_pbroadcastq_256: 348 ; X86: ## %bb.0: 349 ; X86-NEXT: vbroadcastsd %xmm0, %ymm0 350 ; X86-NEXT: retl 351 ; 352 ; X64-LABEL: test_x86_avx2_pbroadcastq_256: 353 ; X64: ## %bb.0: 354 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0 355 ; X64-NEXT: retq 356 %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) 357 ret <4 x i64> %res 358 } 359 declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly 360 361 362 define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) { 363 ; X86-LABEL: test_x86_avx2_pmovsxbd: 364 ; X86: ## %bb.0: 365 ; X86-NEXT: vpmovsxbd %xmm0, %ymm0 366 ; X86-NEXT: retl 367 ; 368 ; X64-LABEL: test_x86_avx2_pmovsxbd: 369 ; X64: ## %bb.0: 370 ; X64-NEXT: vpmovsxbd %xmm0, %ymm0 371 ; X64-NEXT: retq 372 %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 373 ret <8 x i32> %res 374 } 375 declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone 376 377 378 define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) { 379 ; X86-LABEL: test_x86_avx2_pmovsxbq: 380 ; X86: ## %bb.0: 381 ; X86-NEXT: vpmovsxbq %xmm0, %ymm0 382 ; X86-NEXT: retl 383 ; 384 ; X64-LABEL: test_x86_avx2_pmovsxbq: 385 ; X64: ## %bb.0: 386 ; X64-NEXT: vpmovsxbq %xmm0, %ymm0 387 ; X64-NEXT: retq 388 %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 389 ret <4 x i64> %res 390 } 391 declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone 392 393 394 define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) { 395 ; X86-LABEL: test_x86_avx2_pmovsxbw: 396 ; X86: ## %bb.0: 397 ; X86-NEXT: vpmovsxbw %xmm0, %ymm0 398 ; X86-NEXT: retl 399 ; 400 ; X64-LABEL: test_x86_avx2_pmovsxbw: 401 ; X64: ## %bb.0: 402 ; X64-NEXT: vpmovsxbw %xmm0, %ymm0 403 ; X64-NEXT: retq 404 %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 405 ret <16 x i16> %res 406 } 407 declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone 408 409 410 define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) { 411 ; X86-LABEL: test_x86_avx2_pmovsxdq: 412 ; X86: ## %bb.0: 413 ; X86-NEXT: vpmovsxdq %xmm0, %ymm0 414 ; X86-NEXT: retl 415 ; 416 ; X64-LABEL: test_x86_avx2_pmovsxdq: 417 ; X64: ## %bb.0: 418 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0 419 ; X64-NEXT: retq 420 %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 421 ret <4 x i64> %res 422 } 423 declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone 424 425 426 define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) { 427 ; X86-LABEL: test_x86_avx2_pmovsxwd: 428 ; X86: ## %bb.0: 429 ; X86-NEXT: vpmovsxwd %xmm0, %ymm0 430 ; X86-NEXT: retl 431 ; 432 ; X64-LABEL: test_x86_avx2_pmovsxwd: 433 ; X64: ## %bb.0: 434 ; X64-NEXT: vpmovsxwd %xmm0, %ymm0 435 ; X64-NEXT: retq 436 %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 437 ret <8 x i32> %res 438 } 439 declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone 440 441 442 define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) { 443 ; X86-LABEL: test_x86_avx2_pmovsxwq: 444 ; X86: ## %bb.0: 445 ; X86-NEXT: vpmovsxwq %xmm0, %ymm0 446 ; X86-NEXT: retl 447 ; 448 ; X64-LABEL: test_x86_avx2_pmovsxwq: 449 ; X64: ## %bb.0: 450 ; X64-NEXT: vpmovsxwq %xmm0, %ymm0 451 ; X64-NEXT: retq 452 %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 453 ret <4 x i64> %res 454 } 455 declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone 456 457 458 define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) { 459 ; X86-LABEL: test_x86_avx2_pmovzxbd: 460 ; X86: ## %bb.0: 461 ; X86-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 462 ; X86-NEXT: retl 463 ; 464 ; X64-LABEL: test_x86_avx2_pmovzxbd: 465 ; X64: ## %bb.0: 466 ; X64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 467 ; X64-NEXT: retq 468 %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 469 ret <8 x i32> %res 470 } 471 declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone 472 473 474 define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) { 475 ; X86-LABEL: test_x86_avx2_pmovzxbq: 476 ; X86: ## %bb.0: 477 ; X86-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 478 ; X86-NEXT: retl 479 ; 480 ; X64-LABEL: test_x86_avx2_pmovzxbq: 481 ; X64: ## %bb.0: 482 ; X64-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 483 ; X64-NEXT: retq 484 %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 485 ret <4 x i64> %res 486 } 487 declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone 488 489 490 define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) { 491 ; X86-LABEL: test_x86_avx2_pmovzxbw: 492 ; X86: ## %bb.0: 493 ; X86-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 494 ; X86-NEXT: retl 495 ; 496 ; X64-LABEL: test_x86_avx2_pmovzxbw: 497 ; X64: ## %bb.0: 498 ; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 499 ; X64-NEXT: retq 500 %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1] 501 ret <16 x i16> %res 502 } 503 declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone 504 505 506 define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) { 507 ; X86-LABEL: test_x86_avx2_pmovzxdq: 508 ; X86: ## %bb.0: 509 ; X86-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 510 ; X86-NEXT: retl 511 ; 512 ; X64-LABEL: test_x86_avx2_pmovzxdq: 513 ; X64: ## %bb.0: 514 ; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 515 ; X64-NEXT: retq 516 %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 517 ret <4 x i64> %res 518 } 519 declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone 520 521 522 define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) { 523 ; X86-LABEL: test_x86_avx2_pmovzxwd: 524 ; X86: ## %bb.0: 525 ; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 526 ; X86-NEXT: retl 527 ; 528 ; X64-LABEL: test_x86_avx2_pmovzxwd: 529 ; X64: ## %bb.0: 530 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 531 ; X64-NEXT: retq 532 %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 533 ret <8 x i32> %res 534 } 535 declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone 536 537 538 define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) { 539 ; X86-LABEL: test_x86_avx2_pmovzxwq: 540 ; X86: ## %bb.0: 541 ; X86-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 542 ; X86-NEXT: retl 543 ; 544 ; X64-LABEL: test_x86_avx2_pmovzxwq: 545 ; X64: ## %bb.0: 546 ; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 547 ; X64-NEXT: retq 548 %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 549 ret <4 x i64> %res 550 } 551 declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone 552 553 ; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions 554 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 555 ; add operation forces the execution domain. 556 ; X86-LABEL: test_x86_avx_storeu_dq_256: 557 ; X86: ## %bb.0: 558 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 559 ; X86-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 560 ; X86-NEXT: vpsubb %ymm1, %ymm0, %ymm0 561 ; X86-NEXT: vmovdqu %ymm0, (%eax) 562 ; X86-NEXT: vzeroupper 563 ; X86-NEXT: retl 564 ; 565 ; X64-LABEL: test_x86_avx_storeu_dq_256: 566 ; X64: ## %bb.0: 567 ; X64-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 568 ; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0 569 ; X64-NEXT: vmovdqu %ymm0, (%rdi) 570 ; X64-NEXT: vzeroupper 571 ; X64-NEXT: retq 572 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 573 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 574 ret void 575 } 576 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 577 578 define <32 x i8> @mm256_max_epi8(<32 x i8> %a0, <32 x i8> %a1) { 579 ; X86-LABEL: mm256_max_epi8: 580 ; X86: ## %bb.0: 581 ; X86-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 582 ; X86-NEXT: retl 583 ; 584 ; X64-LABEL: mm256_max_epi8: 585 ; X64: ## %bb.0: 586 ; X64-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 587 ; X64-NEXT: retq 588 %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) 589 ret <32 x i8> %res 590 } 591 declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone 592 593 define <16 x i16> @mm256_max_epi16(<16 x i16> %a0, <16 x i16> %a1) { 594 ; X86-LABEL: mm256_max_epi16: 595 ; X86: ## %bb.0: 596 ; X86-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 597 ; X86-NEXT: retl 598 ; 599 ; X64-LABEL: mm256_max_epi16: 600 ; X64: ## %bb.0: 601 ; X64-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 602 ; X64-NEXT: retq 603 %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) 604 ret <16 x i16> %res 605 } 606 declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone 607 608 define <8 x i32> @mm256_max_epi32(<8 x i32> %a0, <8 x i32> %a1) { 609 ; X86-LABEL: mm256_max_epi32: 610 ; X86: ## %bb.0: 611 ; X86-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 612 ; X86-NEXT: retl 613 ; 614 ; X64-LABEL: mm256_max_epi32: 615 ; X64: ## %bb.0: 616 ; X64-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 617 ; X64-NEXT: retq 618 %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) 619 ret <8 x i32> %res 620 } 621 declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone 622 623 define <32 x i8> @mm256_max_epu8(<32 x i8> %a0, <32 x i8> %a1) { 624 ; X86-LABEL: mm256_max_epu8: 625 ; X86: ## %bb.0: 626 ; X86-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 627 ; X86-NEXT: retl 628 ; 629 ; X64-LABEL: mm256_max_epu8: 630 ; X64: ## %bb.0: 631 ; X64-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 632 ; X64-NEXT: retq 633 %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) 634 ret <32 x i8> %res 635 } 636 declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone 637 638 define <16 x i16> @mm256_max_epu16(<16 x i16> %a0, <16 x i16> %a1) { 639 ; X86-LABEL: mm256_max_epu16: 640 ; X86: ## %bb.0: 641 ; X86-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 642 ; X86-NEXT: retl 643 ; 644 ; X64-LABEL: mm256_max_epu16: 645 ; X64: ## %bb.0: 646 ; X64-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 647 ; X64-NEXT: retq 648 %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) 649 ret <16 x i16> %res 650 } 651 declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone 652 653 define <8 x i32> @mm256_max_epu32(<8 x i32> %a0, <8 x i32> %a1) { 654 ; X86-LABEL: mm256_max_epu32: 655 ; X86: ## %bb.0: 656 ; X86-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 657 ; X86-NEXT: retl 658 ; 659 ; X64-LABEL: mm256_max_epu32: 660 ; X64: ## %bb.0: 661 ; X64-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 662 ; X64-NEXT: retq 663 %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) 664 ret <8 x i32> %res 665 } 666 declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone 667 668 define <32 x i8> @mm256_min_epi8(<32 x i8> %a0, <32 x i8> %a1) { 669 ; X86-LABEL: mm256_min_epi8: 670 ; X86: ## %bb.0: 671 ; X86-NEXT: vpminsb %ymm1, %ymm0, %ymm0 672 ; X86-NEXT: retl 673 ; 674 ; X64-LABEL: mm256_min_epi8: 675 ; X64: ## %bb.0: 676 ; X64-NEXT: vpminsb %ymm1, %ymm0, %ymm0 677 ; X64-NEXT: retq 678 %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) 679 ret <32 x i8> %res 680 } 681 declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone 682 683 define <16 x i16> @mm256_min_epi16(<16 x i16> %a0, <16 x i16> %a1) { 684 ; X86-LABEL: mm256_min_epi16: 685 ; X86: ## %bb.0: 686 ; X86-NEXT: vpminsw %ymm1, %ymm0, %ymm0 687 ; X86-NEXT: retl 688 ; 689 ; X64-LABEL: mm256_min_epi16: 690 ; X64: ## %bb.0: 691 ; X64-NEXT: vpminsw %ymm1, %ymm0, %ymm0 692 ; X64-NEXT: retq 693 %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) 694 ret <16 x i16> %res 695 } 696 declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone 697 698 define <8 x i32> @mm256_min_epi32(<8 x i32> %a0, <8 x i32> %a1) { 699 ; X86-LABEL: mm256_min_epi32: 700 ; X86: ## %bb.0: 701 ; X86-NEXT: vpminsd %ymm1, %ymm0, %ymm0 702 ; X86-NEXT: retl 703 ; 704 ; X64-LABEL: mm256_min_epi32: 705 ; X64: ## %bb.0: 706 ; X64-NEXT: vpminsd %ymm1, %ymm0, %ymm0 707 ; X64-NEXT: retq 708 %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) 709 ret <8 x i32> %res 710 } 711 declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone 712 713 define <32 x i8> @mm256_min_epu8(<32 x i8> %a0, <32 x i8> %a1) { 714 ; X86-LABEL: mm256_min_epu8: 715 ; X86: ## %bb.0: 716 ; X86-NEXT: vpminub %ymm1, %ymm0, %ymm0 717 ; X86-NEXT: retl 718 ; 719 ; X64-LABEL: mm256_min_epu8: 720 ; X64: ## %bb.0: 721 ; X64-NEXT: vpminub %ymm1, %ymm0, %ymm0 722 ; X64-NEXT: retq 723 %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) 724 ret <32 x i8> %res 725 } 726 declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone 727 728 define <16 x i16> @mm256_min_epu16(<16 x i16> %a0, <16 x i16> %a1) { 729 ; X86-LABEL: mm256_min_epu16: 730 ; X86: ## %bb.0: 731 ; X86-NEXT: vpminuw %ymm1, %ymm0, %ymm0 732 ; X86-NEXT: retl 733 ; 734 ; X64-LABEL: mm256_min_epu16: 735 ; X64: ## %bb.0: 736 ; X64-NEXT: vpminuw %ymm1, %ymm0, %ymm0 737 ; X64-NEXT: retq 738 %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) 739 ret <16 x i16> %res 740 } 741 declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone 742 743 define <8 x i32> @mm256_min_epu32(<8 x i32> %a0, <8 x i32> %a1) { 744 ; X86-LABEL: mm256_min_epu32: 745 ; X86: ## %bb.0: 746 ; X86-NEXT: vpminud %ymm1, %ymm0, %ymm0 747 ; X86-NEXT: retl 748 ; 749 ; X64-LABEL: mm256_min_epu32: 750 ; X64: ## %bb.0: 751 ; X64-NEXT: vpminud %ymm1, %ymm0, %ymm0 752 ; X64-NEXT: retq 753 %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) 754 ret <8 x i32> %res 755 } 756 declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone 757 758 define <32 x i8> @mm256_avg_epu8(<32 x i8> %a0, <32 x i8> %a1) { 759 ; X86-LABEL: mm256_avg_epu8: 760 ; X86: ## %bb.0: 761 ; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm0 762 ; X86-NEXT: retl 763 ; 764 ; X64-LABEL: mm256_avg_epu8: 765 ; X64: ## %bb.0: 766 ; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm0 767 ; X64-NEXT: retq 768 %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 769 ret <32 x i8> %res 770 } 771 declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone 772 773 define <16 x i16> @mm256_avg_epu16(<16 x i16> %a0, <16 x i16> %a1) { 774 ; X86-LABEL: mm256_avg_epu16: 775 ; X86: ## %bb.0: 776 ; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm0 777 ; X86-NEXT: retl 778 ; 779 ; X64-LABEL: mm256_avg_epu16: 780 ; X64: ## %bb.0: 781 ; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm0 782 ; X64-NEXT: retq 783 %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 784 ret <16 x i16> %res 785 } 786 declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone 787 788 define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) { 789 ; X86-LABEL: test_x86_avx2_pabs_b: 790 ; X86: ## %bb.0: 791 ; X86-NEXT: vpabsb %ymm0, %ymm0 792 ; X86-NEXT: retl 793 ; 794 ; X64-LABEL: test_x86_avx2_pabs_b: 795 ; X64: ## %bb.0: 796 ; X64-NEXT: vpabsb %ymm0, %ymm0 797 ; X64-NEXT: retq 798 %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1] 799 ret <32 x i8> %res 800 } 801 declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone 802 803 define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) { 804 ; X86-LABEL: test_x86_avx2_pabs_d: 805 ; X86: ## %bb.0: 806 ; X86-NEXT: vpabsd %ymm0, %ymm0 807 ; X86-NEXT: retl 808 ; 809 ; X64-LABEL: test_x86_avx2_pabs_d: 810 ; X64: ## %bb.0: 811 ; X64-NEXT: vpabsd %ymm0, %ymm0 812 ; X64-NEXT: retq 813 %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1] 814 ret <8 x i32> %res 815 } 816 declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone 817 818 819 define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) { 820 ; X86-LABEL: test_x86_avx2_pabs_w: 821 ; X86: ## %bb.0: 822 ; X86-NEXT: vpabsw %ymm0, %ymm0 823 ; X86-NEXT: retl 824 ; 825 ; X64-LABEL: test_x86_avx2_pabs_w: 826 ; X64: ## %bb.0: 827 ; X64-NEXT: vpabsw %ymm0, %ymm0 828 ; X64-NEXT: retq 829 %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1] 830 ret <16 x i16> %res 831 } 832 declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone 833 834 835 define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { 836 ; X86-LABEL: test_x86_avx2_vperm2i128: 837 ; X86: ## %bb.0: 838 ; X86-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 839 ; X86-NEXT: retl 840 ; 841 ; X64-LABEL: test_x86_avx2_vperm2i128: 842 ; X64: ## %bb.0: 843 ; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 844 ; X64-NEXT: retq 845 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1] 846 ret <4 x i64> %res 847 } 848 declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly 849 850 851 define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) { 852 ; X86-LABEL: test_x86_avx2_pmulu_dq: 853 ; X86: ## %bb.0: 854 ; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 855 ; X86-NEXT: retl 856 ; 857 ; X64-LABEL: test_x86_avx2_pmulu_dq: 858 ; X64: ## %bb.0: 859 ; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 860 ; X64-NEXT: retq 861 %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1] 862 ret <4 x i64> %res 863 } 864 declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone 865 866 867 define <4 x i64> @test_x86_avx2_pmul_dq(<8 x i32> %a0, <8 x i32> %a1) { 868 ; X86-LABEL: test_x86_avx2_pmul_dq: 869 ; X86: ## %bb.0: 870 ; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 871 ; X86-NEXT: retl 872 ; 873 ; X64-LABEL: test_x86_avx2_pmul_dq: 874 ; X64: ## %bb.0: 875 ; X64-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 876 ; X64-NEXT: retq 877 %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1] 878 ret <4 x i64> %res 879 } 880 declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone 881