1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL --check-prefix=CHECK %s 2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=CHECK %s 3 4 ;CHECK-LABEL: test1: 5 ;CHECK: vinsertps 6 ;CHECK: vinsertf32x4 7 ;CHECK: ret 8 define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { 9 %rrr = load float, float* %br 10 %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 11 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 12 ret <16 x float> %rrr3 13 } 14 15 ;CHECK-LABEL: test2: 16 ;KNL: vinsertf32x4 $0 17 ;SKX: vinsertf64x2 $0 18 ;CHECK: vextractf32x4 $3 19 ;KNL: vinsertf32x4 $3 20 ;SKX: vinsertf64x2 $3 21 ;CHECK: ret 22 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { 23 %rrr = load double, double* %br 24 %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 25 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 26 ret <8 x double> %rrr3 27 } 28 29 ;CHECK-LABEL: test3: 30 ;CHECK: vextractf32x4 $1 31 ;CHECK: vinsertf32x4 $0 32 ;CHECK: ret 33 define <16 x float> @test3(<16 x float> %x) nounwind { 34 %eee = extractelement <16 x float> %x, i32 4 35 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 36 ret <16 x float> %rrr2 37 } 38 39 ;CHECK-LABEL: test4: 40 ;CHECK: vextracti32x4 $2 41 ;KNL: vinserti32x4 $0 42 ;SKX: vinserti64x2 $0 43 ;CHECK: ret 44 define <8 x i64> @test4(<8 x i64> %x) nounwind { 45 %eee = extractelement <8 x i64> %x, i32 4 46 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 47 ret <8 x i64> %rrr2 48 } 49 50 ;CHECK-LABEL: test5: 51 ;CHECK: vextractps 52 ;CHECK: ret 53 define i32 @test5(<4 x float> %x) nounwind { 54 %ef = extractelement <4 x float> %x, i32 3 55 %ei = bitcast float %ef to i32 56 ret i32 %ei 57 } 58 59 ;CHECK-LABEL: test6: 60 ;CHECK: vextractps {{.*}}, (%rdi) 61 ;CHECK: ret 62 define void @test6(<4 x float> %x, float* %out) nounwind { 63 %ef = extractelement <4 x float> %x, i32 3 64 store float %ef, float* %out, align 4 65 ret void 66 } 67 68 ;CHECK-LABEL: test7 69 ;CHECK: vmovd 70 ;CHECK: vpermps %zmm 71 ;CHECK: ret 72 define float @test7(<16 x float> %x, i32 %ind) nounwind { 73 %e = extractelement <16 x float> %x, i32 %ind 74 ret float %e 75 } 76 77 ;CHECK-LABEL: test8 78 ;CHECK: vmovq 79 ;CHECK: vpermpd %zmm 80 ;CHECK: ret 81 define double @test8(<8 x double> %x, i32 %ind) nounwind { 82 %e = extractelement <8 x double> %x, i32 %ind 83 ret double %e 84 } 85 86 ;CHECK-LABEL: test9 87 ;CHECK: vmovd 88 ;CHECK: vpermps %ymm 89 ;CHECK: ret 90 define float @test9(<8 x float> %x, i32 %ind) nounwind { 91 %e = extractelement <8 x float> %x, i32 %ind 92 ret float %e 93 } 94 95 ;CHECK-LABEL: test10 96 ;CHECK: vmovd 97 ;CHECK: vpermd %zmm 98 ;CHECK: vmovd %xmm0, %eax 99 ;CHECK: ret 100 define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { 101 %e = extractelement <16 x i32> %x, i32 %ind 102 ret i32 %e 103 } 104 105 ;CHECK-LABEL: test11 106 ;CHECK: vpcmpltud 107 ;CHECK: kshiftlw $11 108 ;CHECK: kshiftrw $15 109 ;CHECK: testb 110 ;CHECK: je 111 ;CHECK: ret 112 ;CHECK: ret 113 define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { 114 %cmp_res = icmp ult <16 x i32> %a, %b 115 %ia = extractelement <16 x i1> %cmp_res, i32 4 116 br i1 %ia, label %A, label %B 117 A: 118 ret <16 x i32>%b 119 B: 120 %c = add <16 x i32>%b, %a 121 ret <16 x i32>%c 122 } 123 124 ;CHECK-LABEL: test12 125 ;CHECK: vpcmpgtq 126 ;CHECK: kshiftlw $15 127 ;CHECK: kshiftrw $15 128 ;CHECK: testb 129 ;CHECK: ret 130 131 define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { 132 133 %cmpvector_func.i = icmp slt <16 x i64> %a, %b 134 %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0 135 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 136 ret i64 %res 137 } 138 139 ;CHECK-LABEL: test13 140 ;CHECK: cmpl 141 ;CHECK: sbbl 142 ;CHECK: orl $65532 143 ;CHECK: ret 144 define i16 @test13(i32 %a, i32 %b) { 145 %cmp_res = icmp ult i32 %a, %b 146 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0 147 %res = bitcast <16 x i1> %maskv to i16 148 ret i16 %res 149 } 150 151 ;CHECK-LABEL: test14 152 ;CHECK: vpcmpgtq 153 ;KNL: kshiftlw $11 154 ;KNL: kshiftrw $15 155 ;KNL: testb 156 ;SKX: kshiftlb $3 157 ;SKX: kshiftrb $7 158 ;SKX: testb 159 ;CHECK: ret 160 161 define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { 162 163 %cmpvector_func.i = icmp slt <8 x i64> %a, %b 164 %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4 165 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 166 ret i64 %res 167 } 168 169 ;CHECK-LABEL: test15 170 ;CHECK: kshiftlw 171 ;CHECK: kmovw 172 ;CHECK: ret 173 define i16 @test15(i1 *%addr) { 174 %x = load i1 , i1 * %addr, align 128 175 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 176 %x2 = bitcast <16 x i1>%x1 to i16 177 ret i16 %x2 178 } 179 180 ;CHECK-LABEL: test16 181 ;CHECK: kshiftlw 182 ;CHECK: kshiftrw 183 ;CHECK: korw 184 ;CHECK: ret 185 define i16 @test16(i1 *%addr, i16 %a) { 186 %x = load i1 , i1 * %addr, align 128 187 %a1 = bitcast i16 %a to <16 x i1> 188 %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10 189 %x2 = bitcast <16 x i1>%x1 to i16 190 ret i16 %x2 191 } 192 193 ;CHECK-LABEL: test17 194 ;KNL: kshiftlw 195 ;KNL: kshiftrw 196 ;KNL: korw 197 ;SKX: kshiftlb 198 ;SKX: kshiftrb 199 ;SKX: korb 200 ;CHECK: ret 201 define i8 @test17(i1 *%addr, i8 %a) { 202 %x = load i1 , i1 * %addr, align 128 203 %a1 = bitcast i8 %a to <8 x i1> 204 %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4 205 %x2 = bitcast <8 x i1>%x1 to i8 206 ret i8 %x2 207 } 208 209