Home | History | Annotate | Download | only in InstCombine
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: opt < %s -instcombine -S | FileCheck %s
      3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      4 
      5 ; Verify that instcombine is able to fold identity shuffles.
      6 
      7 define <4 x float> @identity_test_vpermilvar_ps(<4 x float> %v) {
      8 ; CHECK-LABEL: @identity_test_vpermilvar_ps(
      9 ; CHECK-NEXT:    ret <4 x float> %v
     10 ;
     11   %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 3>)
     12   ret <4 x float> %a
     13 }
     14 
     15 define <8 x float> @identity_test_vpermilvar_ps_256(<8 x float> %v) {
     16 ; CHECK-LABEL: @identity_test_vpermilvar_ps_256(
     17 ; CHECK-NEXT:    ret <8 x float> %v
     18 ;
     19   %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
     20   ret <8 x float> %a
     21 }
     22 
     23 define <2 x double> @identity_test_vpermilvar_pd(<2 x double> %v) {
     24 ; CHECK-LABEL: @identity_test_vpermilvar_pd(
     25 ; CHECK-NEXT:    ret <2 x double> %v
     26 ;
     27   %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 0, i64 2>)
     28   ret <2 x double> %a
     29 }
     30 
     31 define <4 x double> @identity_test_vpermilvar_pd_256(<4 x double> %v) {
     32 ; CHECK-LABEL: @identity_test_vpermilvar_pd_256(
     33 ; CHECK-NEXT:    ret <4 x double> %v
     34 ;
     35   %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 0, i64 2, i64 0, i64 2>)
     36   ret <4 x double> %a
     37 }
     38 
     39 ; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
     40 ; with a shuffle mask of all zeroes.
     41 
     42 define <4 x float> @zero_test_vpermilvar_ps_zero(<4 x float> %v) {
     43 ; CHECK-LABEL: @zero_test_vpermilvar_ps_zero(
     44 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
     45 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
     46 ;
     47   %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer)
     48   ret <4 x float> %a
     49 }
     50 
     51 define <8 x float> @zero_test_vpermilvar_ps_256_zero(<8 x float> %v) {
     52 ; CHECK-LABEL: @zero_test_vpermilvar_ps_256_zero(
     53 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
     54 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
     55 ;
     56   %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer)
     57   ret <8 x float> %a
     58 }
     59 
     60 define <2 x double> @zero_test_vpermilvar_pd_zero(<2 x double> %v) {
     61 ; CHECK-LABEL: @zero_test_vpermilvar_pd_zero(
     62 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
     63 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
     64 ;
     65   %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer)
     66   ret <2 x double> %a
     67 }
     68 
     69 define <4 x double> @zero_test_vpermilvar_pd_256_zero(<4 x double> %v) {
     70 ; CHECK-LABEL: @zero_test_vpermilvar_pd_256_zero(
     71 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
     72 ; CHECK-NEXT:    ret <4 x double> [[TMP1]]
     73 ;
     74   %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer)
     75   ret <4 x double> %a
     76 }
     77 
     78 ; Verify that instcombine is able to fold constant shuffles.
     79 
     80 define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
     81 ; CHECK-LABEL: @test_vpermilvar_ps(
     82 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
     83 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
     84 ;
     85   %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
     86   ret <4 x float> %a
     87 }
     88 
     89 define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
     90 ; CHECK-LABEL: @test_vpermilvar_ps_256(
     91 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
     92 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
     93 ;
     94   %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
     95   ret <8 x float> %a
     96 }
     97 
     98 define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
     99 ; CHECK-LABEL: @test_vpermilvar_pd(
    100 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
    101 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
    102 ;
    103   %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 2, i64 0>)
    104   ret <2 x double> %a
    105 }
    106 
    107 define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
    108 ; CHECK-LABEL: @test_vpermilvar_pd_256(
    109 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    110 ; CHECK-NEXT:    ret <4 x double> [[TMP1]]
    111 ;
    112   %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 3, i64 1, i64 2, i64 0>)
    113   ret <4 x double> %a
    114 }
    115 
    116 ; Verify that instcombine is able to fold constant shuffles with undef mask elements.
    117 
    118 define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) {
    119 ; CHECK-LABEL: @undef_test_vpermilvar_ps(
    120 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
    121 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
    122 ;
    123   %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>)
    124   ret <4 x float> %a
    125 }
    126 
    127 define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) {
    128 ; CHECK-LABEL: @undef_test_vpermilvar_ps_256(
    129 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4>
    130 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
    131 ;
    132   %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
    133   ret <8 x float> %a
    134 }
    135 
    136 define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) {
    137 ; CHECK-LABEL: @undef_test_vpermilvar_pd(
    138 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 undef, i32 0>
    139 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
    140 ;
    141   %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>)
    142   ret <2 x double> %a
    143 }
    144 
    145 define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) {
    146 ; CHECK-LABEL: @undef_test_vpermilvar_pd_256(
    147 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef>
    148 ; CHECK-NEXT:    ret <4 x double> [[TMP1]]
    149 ;
    150   %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>)
    151   ret <4 x double> %a
    152 }
    153 
    154 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>)
    155 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>)
    156 
    157 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
    158 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
    159