Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s
      2 
      3 ; Verify that the backend correctly combines AVX2 builtin intrinsics.
      4 
      5 
      6 define <8 x i32> @test_psra_1(<8 x i32> %A) {
      7   %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 3)
      8   %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
      9   %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 2)
     10   ret <8 x i32> %3
     11 }
     12 ; CHECK-LABEL: test_psra_1
     13 ; CHECK: vpsrad $8, %ymm0, %ymm0
     14 ; CHECK-NEXT: ret
     15 
     16 define <16 x i16> @test_psra_2(<16 x i16> %A) {
     17   %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 3)
     18   %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
     19   %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 2)
     20   ret <16 x i16> %3
     21 }
     22 ; CHECK-LABEL: test_psra_2
     23 ; CHECK: vpsraw $8, %ymm0, %ymm0
     24 ; CHECK-NEXT: ret
     25 
     26 define <16 x i16> @test_psra_3(<16 x i16> %A) {
     27   %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
     28   %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
     29   %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
     30   ret <16 x i16> %3
     31 }
     32 ; CHECK-LABEL: test_psra_3
     33 ; CHECK-NOT: vpsraw
     34 ; CHECK: ret
     35 
     36 define <8 x i32> @test_psra_4(<8 x i32> %A) {
     37   %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
     38   %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
     39   %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
     40   ret <8 x i32> %3
     41 }
     42 ; CHECK-LABEL: test_psra_4
     43 ; CHECK-NOT: vpsrad
     44 ; CHECK: ret
     45 
     46 
     47 define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
     48   %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a0, <32 x i8> %a1)
     49   ret <32 x i8> %res
     50 }
     51 ; CHECK-LABEL: test_x86_avx2_pblendvb
     52 ; CHECK-NOT: vpblendvb
     53 ; CHECK: ret
     54 
     55 
     56 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0) {
     57   %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a0, i32 7)
     58   ret <16 x i16> %res
     59 }
     60 ; CHECK-LABEL: test_x86_avx2_pblendw
     61 ; CHECK-NOT: vpblendw
     62 ; CHECK: ret
     63 
     64 
     65 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0) {
     66   %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a0, i32 7)
     67   ret <4 x i32> %res
     68 }
     69 ; CHECK-LABEL: test_x86_avx2_pblendd_128
     70 ; CHECK-NOT: vpblendd
     71 ; CHECK: ret
     72 
     73 
     74 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0) {
     75   %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a0, i32 7)
     76   ret <8 x i32> %res
     77 }
     78 ; CHECK-LABEL: test_x86_avx2_pblendd_256
     79 ; CHECK-NOT: vpblendd
     80 ; CHECK: ret
     81 
     82 
     83 define <32 x i8> @test2_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
     84   %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> zeroinitializer)
     85   ret <32 x i8> %res
     86 }
     87 ; CHECK-LABEL: test2_x86_avx2_pblendvb
     88 ; CHECK-NOT: vpblendvb
     89 ; CHECK: ret
     90 
     91 
     92 define <16 x i16> @test2_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
     93   %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 0)
     94   ret <16 x i16> %res
     95 }
     96 ; CHECK-LABEL: test2_x86_avx2_pblendw
     97 ; CHECK-NOT: vpblendw
     98 ; CHECK: ret
     99 
    100 
    101 define <4 x i32> @test2_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
    102   %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 0)
    103   ret <4 x i32> %res
    104 }
    105 ; CHECK-LABEL: test2_x86_avx2_pblendd_128
    106 ; CHECK-NOT: vpblendd
    107 ; CHECK: ret
    108 
    109 
    110 define <8 x i32> @test2_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
    111   %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 0)
    112   ret <8 x i32> %res
    113 }
    114 ; CHECK-LABEL: test2_x86_avx2_pblendd_256
    115 ; CHECK-NOT: vpblendd
    116 ; CHECK: ret
    117 
    118 
    119 define <32 x i8> @test3_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
    120   %1 = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <32 x i8>
    121   %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %1)
    122   ret <32 x i8> %res
    123 }
    124 ; CHECK-LABEL: test3_x86_avx2_pblendvb
    125 ; CHECK-NOT: vpblendvb
    126 ; CHECK: ret
    127 
    128 
    129 define <16 x i16> @test3_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
    130   %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 -1)
    131   ret <16 x i16> %res
    132 }
    133 ; CHECK-LABEL: test3_x86_avx2_pblendw
    134 ; CHECK-NOT: vpblendw
    135 ; CHECK: ret
    136 
    137 
    138 define <4 x i32> @test3_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
    139   %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 -1)
    140   ret <4 x i32> %res
    141 }
    142 ; CHECK-LABEL: test3_x86_avx2_pblendd_128
    143 ; CHECK-NOT: vpblendd
    144 ; CHECK: ret
    145 
    146 
    147 define <8 x i32> @test3_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
    148   %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 -1)
    149   ret <8 x i32> %res
    150 }
    151 ; CHECK-LABEL: test3_x86_avx2_pblendd_256
    152 ; CHECK-NOT: vpblendd
    153 ; CHECK: ret
    154 
    155 
    156 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
    157 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32)
    158 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32)
    159 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32)
    160 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>)
    161 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32)
    162 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>)
    163 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32)
    164 
    165