Home | History | Annotate | Download | only in InstCombine
      1 ; RUN: opt < %s -instcombine -S | FileCheck %s
      2 
      3 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
      4 
      5 ; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
      6 
      7 define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) {
      8   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
      9   ret <4 x float> %res
     10 
     11 ; CHECK-LABEL: @insertps_non_const_imm
     12 ; CHECK-NEXT:  call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
     13 ; CHECK-NEXT:  ret <4 x float>
     14 }
     15 
     16 ; If all zero mask bits are set, return a zero regardless of the other control bits.
     17 
     18 define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
     19   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
     20   ret <4 x float> %res
     21 
     22 ; CHECK-LABEL: @insertps_0x0f
     23 ; CHECK-NEXT:  ret <4 x float> zeroinitializer
     24 }
     25 define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
     26   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
     27   ret <4 x float> %res
     28 
     29 ; CHECK-LABEL: @insertps_0xff
     30 ; CHECK-NEXT:  ret <4 x float> zeroinitializer
     31 }
     32 
     33 ; If some zero mask bits are set that do not override the insertion, we do not change anything.
     34 
     35 define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
     36   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
     37   ret <4 x float> %res
     38 
     39 ; CHECK-LABEL: @insertps_0x0c
     40 ; CHECK-NEXT:  call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
     41 ; CHECK-NEXT:  ret <4 x float>
     42 }
     43 
     44 ; ...unless both input vectors are the same operand.
     45 
     46 define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
     47   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
     48   ret <4 x float> %res
     49 
     50 ; CHECK-LABEL: @insertps_0x15_single_input
     51 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
     52 ; CHECK-NEXT:  ret <4 x float>
     53 }
     54 
     55 ; The zero mask overrides the insertion lane.
     56 
     57 define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
     58   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
     59   ret <4 x float> %res
     60 
     61 ; CHECK-LABEL: @insertps_0x1a_single_input
     62 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     63 ; CHECK-NEXT:  ret <4 x float>
     64 }
     65 
     66 ; The zero mask overrides the insertion lane, so the second input vector is not used.
     67 
     68 define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
     69   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
     70   ret <4 x float> %res
     71 
     72 ; CHECK-LABEL: @insertps_0xc1
     73 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
     74 ; CHECK-NEXT:  ret <4 x float>
     75 }
     76 
     77 ; If no zero mask bits are set, convert to a shuffle.
     78 
     79 define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
     80   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
     81   ret <4 x float> %res
     82 
     83 ; CHECK-LABEL: @insertps_0x00
     84 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
     85 ; CHECK-NEXT:  ret <4 x float>
     86 }
     87 
     88 define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
     89   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
     90   ret <4 x float> %res
     91 
     92 ; CHECK-LABEL: @insertps_0x10
     93 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
     94 ; CHECK-NEXT:  ret <4 x float>
     95 }
     96 
     97 define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
     98   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
     99   ret <4 x float> %res
    100 
    101 ; CHECK-LABEL: @insertps_0x20
    102 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
    103 ; CHECK-NEXT:  ret <4 x float>
    104 }
    105 
    106 define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
    107   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
    108   ret <4 x float> %res
    109 
    110 ; CHECK-LABEL: @insertps_0x30
    111 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
    112 ; CHECK-NEXT:  ret <4 x float>
    113 }
    114 
    115 define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
    116   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
    117   ret <4 x float> %res
    118 
    119 ; CHECK-LABEL: @insertps_0xc0
    120 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
    121 ; CHECK-NEXT:  ret <4 x float>
    122 }
    123 
    124 define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
    125   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
    126   ret <4 x float> %res
    127 
    128 ; CHECK-LABEL: @insertps_0xd0
    129 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
    130 ; CHECK-NEXT:  ret <4 x float>
    131 }
    132 
    133 define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
    134   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
    135   ret <4 x float> %res
    136 
    137 ; CHECK-LABEL: @insertps_0xe0
    138 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
    139 ; CHECK-NEXT:  ret <4 x float>
    140 }
    141 
    142 define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
    143   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
    144   ret <4 x float> %res
    145 
    146 ; CHECK-LABEL: @insertps_0xf0
    147 ; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
    148 ; CHECK-NEXT:  ret <4 x float>
    149 }
    150 
    151