1 ; RUN: opt < %s -instcombine -S | FileCheck %s 2 3 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 4 5 ; This should never happen, but make sure we don't crash handling a non-constant immediate byte. 6 7 define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) { 8 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c) 9 ret <4 x float> %res 10 11 ; CHECK-LABEL: @insertps_non_const_imm 12 ; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c) 13 ; CHECK-NEXT: ret <4 x float> 14 } 15 16 ; If all zero mask bits are set, return a zero regardless of the other control bits. 17 18 define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) { 19 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15) 20 ret <4 x float> %res 21 22 ; CHECK-LABEL: @insertps_0x0f 23 ; CHECK-NEXT: ret <4 x float> zeroinitializer 24 } 25 define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) { 26 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255) 27 ret <4 x float> %res 28 29 ; CHECK-LABEL: @insertps_0xff 30 ; CHECK-NEXT: ret <4 x float> zeroinitializer 31 } 32 33 ; If some zero mask bits are set that do not override the insertion, we do not change anything. 34 35 define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) { 36 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12) 37 ret <4 x float> %res 38 39 ; CHECK-LABEL: @insertps_0x0c 40 ; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12) 41 ; CHECK-NEXT: ret <4 x float> 42 } 43 44 ; ...unless both input vectors are the same operand. 45 46 define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) { 47 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21) 48 ret <4 x float> %res 49 50 ; CHECK-LABEL: @insertps_0x15_single_input 51 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3> 52 ; CHECK-NEXT: ret <4 x float> 53 } 54 55 ; The zero mask overrides the insertion lane. 56 57 define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) { 58 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26) 59 ret <4 x float> %res 60 61 ; CHECK-LABEL: @insertps_0x1a_single_input 62 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 63 ; CHECK-NEXT: ret <4 x float> 64 } 65 66 ; The zero mask overrides the insertion lane, so the second input vector is not used. 67 68 define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) { 69 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193) 70 ret <4 x float> %res 71 72 ; CHECK-LABEL: @insertps_0xc1 73 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 74 ; CHECK-NEXT: ret <4 x float> 75 } 76 77 ; If no zero mask bits are set, convert to a shuffle. 78 79 define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) { 80 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0) 81 ret <4 x float> %res 82 83 ; CHECK-LABEL: @insertps_0x00 84 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 85 ; CHECK-NEXT: ret <4 x float> 86 } 87 88 define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) { 89 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16) 90 ret <4 x float> %res 91 92 ; CHECK-LABEL: @insertps_0x10 93 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 94 ; CHECK-NEXT: ret <4 x float> 95 } 96 97 define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) { 98 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32) 99 ret <4 x float> %res 100 101 ; CHECK-LABEL: @insertps_0x20 102 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3> 103 ; CHECK-NEXT: ret <4 x float> 104 } 105 106 define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) { 107 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48) 108 ret <4 x float> %res 109 110 ; CHECK-LABEL: @insertps_0x30 111 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 112 ; CHECK-NEXT: ret <4 x float> 113 } 114 115 define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) { 116 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192) 117 ret <4 x float> %res 118 119 ; CHECK-LABEL: @insertps_0xc0 120 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3> 121 ; CHECK-NEXT: ret <4 x float> 122 } 123 124 define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) { 125 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208) 126 ret <4 x float> %res 127 128 ; CHECK-LABEL: @insertps_0xd0 129 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3> 130 ; CHECK-NEXT: ret <4 x float> 131 } 132 133 define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) { 134 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224) 135 ret <4 x float> %res 136 137 ; CHECK-LABEL: @insertps_0xe0 138 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3> 139 ; CHECK-NEXT: ret <4 x float> 140 } 141 142 define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) { 143 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240) 144 ret <4 x float> %res 145 146 ; CHECK-LABEL: @insertps_0xf0 147 ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 148 ; CHECK-NEXT: ret <4 x float> 149 } 150 151