Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=avx2 < %s | FileCheck %s
      2 
      3 define <8 x i16> @commute_fold_vpblendw_128(<8 x i16> %a, <8 x i16>* %b) #0 {
      4   %1 = load <8 x i16>, <8 x i16>* %b
      5   %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %1, <8 x i16> %a, i8 17)
      6   ret <8 x i16> %2
      7 
      8   ;LABEL:      commute_fold_vpblendw_128
      9   ;CHECK:      vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1,2,3],xmm0[4],mem[5,6,7]
     10   ;CHECK-NEXT: retq
     11 }
     12 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
     13 
     14 define <16 x i16> @commute_fold_vpblendw_256(<16 x i16> %a, <16 x i16>* %b) #0 {
     15   %1 = load <16 x i16>, <16 x i16>* %b
     16   %2 = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %1, <16 x i16> %a, i8 17)
     17   ret <16 x i16> %2
     18 
     19   ;LABEL:      commute_fold_vpblendw_256
     20   ;CHECK:      vpblendw {{.*#+}} ymm0 = ymm0[0],mem[1,2,3],ymm0[4],mem[5,6,7],ymm0[8],mem[9,10,11],ymm0[12],mem[13,14,15]
     21   ;CHECK-NEXT: retq
     22 }
     23 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
     24 
     25 define <4 x i32> @commute_fold_vpblendd_128(<4 x i32> %a, <4 x i32>* %b) #0 {
     26   %1 = load <4 x i32>, <4 x i32>* %b
     27   %2 = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %1, <4 x i32> %a, i8 1)
     28   ret <4 x i32> %2
     29 
     30   ;LABEL:      commute_fold_vpblendd_128
     31   ;CHECK:      vpblendd {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
     32   ;CHECK-NEXT: retq
     33 }
     34 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
     35 
     36 define <8 x i32> @commute_fold_vpblendd_256(<8 x i32> %a, <8 x i32>* %b) #0 {
     37   %1 = load <8 x i32>, <8 x i32>* %b
     38   %2 = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %1, <8 x i32> %a, i8 129)
     39   ret <8 x i32> %2
     40 
     41   ;LABEL:      commute_fold_vpblendd_256
     42   ;CHECK:      vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3,4,5,6],ymm0[7]
     43   ;CHECK-NEXT: retq
     44 }
     45 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
     46 
     47 define <4 x float> @commute_fold_vblendps_128(<4 x float> %a, <4 x float>* %b) #0 {
     48   %1 = load <4 x float>, <4 x float>* %b
     49   %2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 5)
     50   ret <4 x float> %2
     51 
     52   ;LABEL:      commute_fold_vblendps_128
     53   ;CHECK:      vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3]
     54   ;CHECK-NEXT: retq
     55 }
     56 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
     57 
     58 define <8 x float> @commute_fold_vblendps_256(<8 x float> %a, <8 x float>* %b) #0 {
     59   %1 = load <8 x float>, <8 x float>* %b
     60   %2 = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %1, <8 x float> %a, i8 7)
     61   ret <8 x float> %2
     62 
     63   ;LABEL:      commute_fold_vblendps_256
     64   ;CHECK:      vblendps {{.*#+}} ymm0 = ymm0[0,1,2],mem[3,4,5,6,7]
     65   ;CHECK-NEXT: retq
     66 }
     67 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
     68 
     69 define <2 x double> @commute_fold_vblendpd_128(<2 x double> %a, <2 x double>* %b) #0 {
     70   %1 = load <2 x double>, <2 x double>* %b
     71   %2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1)
     72   ret <2 x double> %2
     73 
     74   ;LABEL:      commute_fold_vblendpd_128
     75   ;CHECK:      vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1]
     76   ;CHECK-NEXT: retq
     77 }
     78 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
     79 
     80 define <4 x double> @commute_fold_vblendpd_256(<4 x double> %a, <4 x double>* %b) #0 {
     81   %1 = load <4 x double>, <4 x double>* %b
     82   %2 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %1, <4 x double> %a, i8 7)
     83   ret <4 x double> %2
     84 
     85   ;LABEL:      commute_fold_vblendpd_256
     86   ;CHECK:      vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],mem[3]
     87   ;CHECK-NEXT: retq
     88 }
     89 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
     90