1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s 2 3 ;CHECK-LABEL: vsel_float: 4 ;CHECK: blendps 5 ;CHECK: ret 6 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { 7 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x float> %v1, <4 x float> %v2 8 ret <4 x float> %vsel 9 } 10 11 12 ;CHECK-LABEL: vsel_4xi8: 13 ;CHECK: blendps 14 ;CHECK: ret 15 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { 16 %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2 17 ret <4 x i8> %vsel 18 } 19 20 ;CHECK-LABEL: vsel_4xi16: 21 ;CHECK: blendps 22 ;CHECK: ret 23 define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { 24 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2 25 ret <4 x i16> %vsel 26 } 27 28 29 ;CHECK-LABEL: vsel_i32: 30 ;CHECK: blendps 31 ;CHECK: ret 32 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { 33 %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %v1, <4 x i32> %v2 34 ret <4 x i32> %vsel 35 } 36 37 38 ;CHECK-LABEL: vsel_double: 39 ;CHECK: movsd 40 ;CHECK: ret 41 define <4 x double> @vsel_double(<4 x double> %v1, <4 x double> %v2) { 42 %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %v1, <4 x double> %v2 43 ret <4 x double> %vsel 44 } 45 46 47 ;CHECK-LABEL: vsel_i64: 48 ;CHECK: movsd 49 ;CHECK: ret 50 define <4 x i64> @vsel_i64(<4 x i64> %v1, <4 x i64> %v2) { 51 %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v1, <4 x i64> %v2 52 ret <4 x i64> %vsel 53 } 54 55 56 ;CHECK-LABEL: vsel_i8: 57 ;CHECK: pblendvb 58 ;CHECK: ret 59 define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { 60 %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2 61 ret <16 x i8> %vsel 62 } 63 64 ;; TEST blend + compares 65 ; CHECK: A 66 define <2 x double> @A(<2 x double> %x, <2 x double> %y) { 67 ; CHECK: cmplepd 68 ; CHECK: blendvpd 69 %max_is_x = fcmp oge <2 x double> %x, %y 70 %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y 71 ret <2 x double> %max 72 } 73 74 ; CHECK: B 75 define <2 x double> @B(<2 x double> %x, <2 x double> %y) { 76 ; CHECK: cmpnlepd 77 ; CHECK: blendvpd 78 %min_is_x = fcmp ult <2 x double> %x, %y 79 %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y 80 ret <2 x double> %min 81 } 82 83 ; CHECK: float_crash 84 define void @float_crash() nounwind { 85 entry: 86 %merge205vector_func.i = select <4 x i1> undef, <4 x double> undef, <4 x double> undef 87 %extract214vector_func.i = extractelement <4 x double> %merge205vector_func.i, i32 0 88 store double %extract214vector_func.i, double addrspace(1)* undef, align 8 89 ret void 90 } 91 92 ; If we can figure out a blend has a constant mask, we should emit the 93 ; blend instruction with an immediate mask 94 define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) { 95 ; In this case, we emit a simple movss 96 ; CHECK-LABEL: constant_blendvpd 97 ; CHECK: movsd 98 ; CHECK: ret 99 %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %xy, <2 x double> %ab 100 ret <2 x double> %1 101 } 102 103 define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) { 104 ; CHECK-LABEL: constant_blendvps 105 ; CHECK-NOT: mov 106 ; CHECK: blendps $7 107 ; CHECK: ret 108 %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %xyzw, <4 x float> %abcd 109 ret <4 x float> %1 110 } 111 112 define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { 113 ; CHECK-LABEL: constant_pblendvb: 114 ; CHECK: movaps 115 ; CHECK: pblendvb 116 ; CHECK: ret 117 %1 = select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %xyzw, <16 x i8> %abcd 118 ret <16 x i8> %1 119 } 120 121 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) 122 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) 123 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) 124 125 ;; 2 tests for shufflevectors that optimize to blend + immediate 126 ; CHECK-LABEL: @blend_shufflevector_4xfloat 127 ; CHECK: blendps $6, %xmm1, %xmm0 128 ; CHECK: ret 129 define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) { 130 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 131 ret <4 x float> %1 132 } 133 134 ; CHECK-LABEL: @blend_shufflevector_8xi16 135 ; CHECK: pblendw $134, %xmm1, %xmm0 136 ; CHECK: ret 137 define <8 x i16> @blend_shufflevector_8xi16(<8 x i16> %a, <8 x i16> %b) { 138 %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15> 139 ret <8 x i16> %1 140 } 141