1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 2 3 ;CHECK-LABEL: test1: 4 ;CHECK: vinsertps 5 ;CHECK: vinsertf32x4 6 ;CHECK: ret 7 define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { 8 %rrr = load float* %br 9 %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 10 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 11 ret <16 x float> %rrr3 12 } 13 14 ;CHECK-LABEL: test2: 15 ;CHECK: vinsertf32x4 16 ;CHECK: vextractf32x4 17 ;CHECK: vinsertf32x4 18 ;CHECK: ret 19 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { 20 %rrr = load double* %br 21 %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 22 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 23 ret <8 x double> %rrr3 24 } 25 26 ;CHECK-LABEL: test3: 27 ;CHECK: vextractf32x4 28 ;CHECK: vinsertf32x4 29 ;CHECK: ret 30 define <16 x float> @test3(<16 x float> %x) nounwind { 31 %eee = extractelement <16 x float> %x, i32 4 32 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 33 ret <16 x float> %rrr2 34 } 35 36 ;CHECK-LABEL: test4: 37 ;CHECK: vextracti32x4 38 ;CHECK: vinserti32x4 39 ;CHECK: ret 40 define <8 x i64> @test4(<8 x i64> %x) nounwind { 41 %eee = extractelement <8 x i64> %x, i32 4 42 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 43 ret <8 x i64> %rrr2 44 } 45 46 ;CHECK-LABEL: test5: 47 ;CHECK: vextractps 48 ;CHECK: ret 49 define i32 @test5(<4 x float> %x) nounwind { 50 %ef = extractelement <4 x float> %x, i32 3 51 %ei = bitcast float %ef to i32 52 ret i32 %ei 53 } 54 55 ;CHECK-LABEL: test6: 56 ;CHECK: vextractps {{.*}}, (%rdi) 57 ;CHECK: ret 58 define void @test6(<4 x float> %x, float* %out) nounwind { 59 %ef = extractelement <4 x float> %x, i32 3 60 store float %ef, float* %out, align 4 61 ret void 62 } 63 64 ;CHECK-LABEL: test7 65 ;CHECK: vmovd 66 ;CHECK: vpermps %zmm 67 ;CHECK: ret 68 define float @test7(<16 x float> %x, i32 %ind) nounwind { 69 %e = extractelement <16 x float> %x, i32 %ind 70 ret float %e 71 } 72 73 ;CHECK-LABEL: test8 74 ;CHECK: vmovq 75 ;CHECK: vpermpd %zmm 76 ;CHECK: ret 77 define double @test8(<8 x double> %x, i32 %ind) nounwind { 78 %e = extractelement <8 x double> %x, i32 %ind 79 ret double %e 80 } 81 82 ;CHECK-LABEL: test9 83 ;CHECK: vmovd 84 ;CHECK: vpermps %ymm 85 ;CHECK: ret 86 define float @test9(<8 x float> %x, i32 %ind) nounwind { 87 %e = extractelement <8 x float> %x, i32 %ind 88 ret float %e 89 } 90 91 ;CHECK-LABEL: test10 92 ;CHECK: vmovd 93 ;CHECK: vpermd %zmm 94 ;CHECK: vmovd %xmm0, %eax 95 ;CHECK: ret 96 define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { 97 %e = extractelement <16 x i32> %x, i32 %ind 98 ret i32 %e 99 } 100 101 ;CHECK-LABEL: test11 102 ;CHECK: vpcmpltud 103 ;CHECK: kshiftlw $11 104 ;CHECK: kshiftrw $15 105 ;CHECK: kortestw 106 ;CHECK: je 107 ;CHECK: ret 108 ;CHECK: ret 109 define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { 110 %cmp_res = icmp ult <16 x i32> %a, %b 111 %ia = extractelement <16 x i1> %cmp_res, i32 4 112 br i1 %ia, label %A, label %B 113 A: 114 ret <16 x i32>%b 115 B: 116 %c = add <16 x i32>%b, %a 117 ret <16 x i32>%c 118 } 119 120 ;CHECK-LABEL: test12 121 ;CHECK: vpcmpgtq 122 ;CHECK: kshiftlw $15 123 ;CHECK: kshiftrw $15 124 ;CHECK: kortestw 125 ;CHECK: ret 126 127 define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { 128 129 %cmpvector_func.i = icmp slt <16 x i64> %a, %b 130 %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0 131 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 132 ret i64 %res 133 } 134 135 ;CHECK-LABEL: test13 136 ;CHECK: cmpl 137 ;CHECK: sbbl 138 ;CHECK: orl $65532 139 ;CHECK: ret 140 define i16 @test13(i32 %a, i32 %b) { 141 %cmp_res = icmp ult i32 %a, %b 142 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0 143 %res = bitcast <16 x i1> %maskv to i16 144 ret i16 %res 145 } 146 147 ;CHECK-LABEL: test14 148 ;CHECK: vpcmpgtq 149 ;CHECK: kshiftlw $11 150 ;CHECK: kshiftrw $15 151 ;CHECK: kortestw 152 ;CHECK: ret 153 154 define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { 155 156 %cmpvector_func.i = icmp slt <8 x i64> %a, %b 157 %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4 158 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 159 ret i64 %res 160 } 161 162 ;CHECK-LABEL: test15 163 ;CHECK: kshiftlw 164 ;CHECK: kmovw 165 ;CHECK: ret 166 define i16 @test15(i1 *%addr) { 167 %x = load i1 * %addr, align 128 168 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 169 %x2 = bitcast <16 x i1>%x1 to i16 170 ret i16 %x2 171 } 172 173 ;CHECK-LABEL: test16 174 ;CHECK: kshiftlw 175 ;CHECK: kshiftrw 176 ;CHECK: korw 177 ;CHECK: ret 178 define i16 @test16(i1 *%addr, i16 %a) { 179 %x = load i1 * %addr, align 128 180 %a1 = bitcast i16 %a to <16 x i1> 181 %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10 182 %x2 = bitcast <16 x i1>%x1 to i16 183 ret i16 %x2 184 } 185 186 ;CHECK-LABEL: test17 187 ;CHECK: kshiftlw 188 ;CHECK: kshiftrw 189 ;CHECK: korw 190 ;CHECK: ret 191 define i8 @test17(i1 *%addr, i8 %a) { 192 %x = load i1 * %addr, align 128 193 %a1 = bitcast i8 %a to <8 x i1> 194 %x1 = insertelement <8 x i1> %a1, i1 %x, i32 10 195 %x2 = bitcast <8 x i1>%x1 to i8 196 ret i8 %x2 197 } 198 199