1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X86 3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64 4 5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx2-builtins.c 6 7 define <4 x i64> @test_mm256_abs_epi8(<4 x i64> %a0) { 8 ; CHECK-LABEL: test_mm256_abs_epi8: 9 ; CHECK: # %bb.0: 10 ; CHECK-NEXT: vpabsb %ymm0, %ymm0 11 ; CHECK-NEXT: ret{{[l|q]}} 12 %arg = bitcast <4 x i64> %a0 to <32 x i8> 13 %sub = sub <32 x i8> zeroinitializer, %arg 14 %cmp = icmp sgt <32 x i8> %arg, zeroinitializer 15 %sel = select <32 x i1> %cmp, <32 x i8> %arg, <32 x i8> %sub 16 %res = bitcast <32 x i8> %sel to <4 x i64> 17 ret <4 x i64> %res 18 } 19 declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone 20 21 define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a0) { 22 ; CHECK-LABEL: test_mm256_abs_epi16: 23 ; CHECK: # %bb.0: 24 ; CHECK-NEXT: vpabsw %ymm0, %ymm0 25 ; CHECK-NEXT: ret{{[l|q]}} 26 %arg = bitcast <4 x i64> %a0 to <16 x i16> 27 %sub = sub <16 x i16> zeroinitializer, %arg 28 %cmp = icmp sgt <16 x i16> %arg, zeroinitializer 29 %sel = select <16 x i1> %cmp, <16 x i16> %arg, <16 x i16> %sub 30 %res = bitcast <16 x i16> %sel to <4 x i64> 31 ret <4 x i64> %res 32 } 33 declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone 34 35 define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a0) { 36 ; CHECK-LABEL: test_mm256_abs_epi32: 37 ; CHECK: # %bb.0: 38 ; CHECK-NEXT: vpabsd %ymm0, %ymm0 39 ; CHECK-NEXT: ret{{[l|q]}} 40 %arg = bitcast <4 x i64> %a0 to <8 x i32> 41 %sub = sub <8 x i32> zeroinitializer, %arg 42 %cmp = icmp sgt <8 x i32> %arg, zeroinitializer 43 %sel = select <8 x i1> %cmp, <8 x i32> %arg, <8 x i32> %sub 44 %res = bitcast <8 x i32> %sel to <4 x i64> 45 ret <4 x i64> %res 46 } 47 declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone 48 49 define <4 x i64> @test_mm256_add_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind { 50 ; CHECK-LABEL: test_mm256_add_epi8: 51 ; CHECK: # %bb.0: 52 ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 53 ; CHECK-NEXT: ret{{[l|q]}} 54 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 55 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 56 %res = add <32 x i8> %arg0, %arg1 57 %bc = bitcast <32 x i8> %res to <4 x i64> 58 ret <4 x i64> %bc 59 } 60 61 define <4 x i64> @test_mm256_add_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind { 62 ; CHECK-LABEL: test_mm256_add_epi16: 63 ; CHECK: # %bb.0: 64 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 65 ; CHECK-NEXT: ret{{[l|q]}} 66 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 67 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 68 %res = add <16 x i16> %arg0, %arg1 69 %bc = bitcast <16 x i16> %res to <4 x i64> 70 ret <4 x i64> %bc 71 } 72 73 define <4 x i64> @test_mm256_add_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 74 ; CHECK-LABEL: test_mm256_add_epi32: 75 ; CHECK: # %bb.0: 76 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 77 ; CHECK-NEXT: ret{{[l|q]}} 78 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 79 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 80 %res = add <8 x i32> %arg0, %arg1 81 %bc = bitcast <8 x i32> %res to <4 x i64> 82 ret <4 x i64> %bc 83 } 84 85 define <4 x i64> @test_mm256_add_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind { 86 ; CHECK-LABEL: test_mm256_add_epi64: 87 ; CHECK: # %bb.0: 88 ; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 89 ; CHECK-NEXT: ret{{[l|q]}} 90 %res = add <4 x i64> %a0, %a1 91 ret <4 x i64> %res 92 } 93 94 define <4 x i64> @test_mm256_adds_epi8(<4 x i64> %a0, <4 x i64> %a1) { 95 ; CHECK-LABEL: test_mm256_adds_epi8: 96 ; CHECK: # %bb.0: 97 ; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 98 ; CHECK-NEXT: ret{{[l|q]}} 99 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 100 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 101 %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %arg0, <32 x i8> %arg1) 102 %bc = bitcast <32 x i8> %res to <4 x i64> 103 ret <4 x i64> %bc 104 } 105 declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone 106 107 define <4 x i64> @test_mm256_adds_epi16(<4 x i64> %a0, <4 x i64> %a1) { 108 ; CHECK-LABEL: test_mm256_adds_epi16: 109 ; CHECK: # %bb.0: 110 ; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 111 ; CHECK-NEXT: ret{{[l|q]}} 112 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 113 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 114 %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %arg0, <16 x i16> %arg1) 115 %bc = bitcast <16 x i16> %res to <4 x i64> 116 ret <4 x i64> %bc 117 } 118 declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone 119 120 define <4 x i64> @test_mm256_adds_epu8(<4 x i64> %a0, <4 x i64> %a1) { 121 ; CHECK-LABEL: test_mm256_adds_epu8: 122 ; CHECK: # %bb.0: 123 ; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 124 ; CHECK-NEXT: ret{{[l|q]}} 125 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 126 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 127 %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %arg0, <32 x i8> %arg1) 128 %bc = bitcast <32 x i8> %res to <4 x i64> 129 ret <4 x i64> %bc 130 } 131 declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone 132 133 define <4 x i64> @test_mm256_adds_epu16(<4 x i64> %a0, <4 x i64> %a1) { 134 ; CHECK-LABEL: test_mm256_adds_epu16: 135 ; CHECK: # %bb.0: 136 ; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 137 ; CHECK-NEXT: ret{{[l|q]}} 138 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 139 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 140 %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %arg0, <16 x i16> %arg1) 141 %bc = bitcast <16 x i16> %res to <4 x i64> 142 ret <4 x i64> %bc 143 } 144 declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone 145 146 define <4 x i64> @test_mm256_alignr_epi8(<4 x i64> %a0, <4 x i64> %a1) { 147 ; CHECK-LABEL: test_mm256_alignr_epi8: 148 ; CHECK: # %bb.0: 149 ; CHECK-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17] 150 ; CHECK-NEXT: ret{{[l|q]}} 151 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 152 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 153 %shuf = shufflevector <32 x i8> %arg0, <32 x i8> %arg1, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49> 154 %res = bitcast <32 x i8> %shuf to <4 x i64> 155 ret <4 x i64> %res 156 } 157 158 define <4 x i64> @test2_mm256_alignr_epi8(<4 x i64> %a0, <4 x i64> %a1) { 159 ; CHECK-LABEL: test2_mm256_alignr_epi8: 160 ; CHECK: # %bb.0: 161 ; CHECK-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] 162 ; CHECK-NEXT: ret{{[l|q]}} 163 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 164 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 165 %shuf = shufflevector <32 x i8> %arg0, <32 x i8> %arg1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48> 166 %res = bitcast <32 x i8> %shuf to <4 x i64> 167 ret <4 x i64> %res 168 } 169 170 define <4 x i64> @test_mm256_and_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { 171 ; CHECK-LABEL: test_mm256_and_si256: 172 ; CHECK: # %bb.0: 173 ; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 174 ; CHECK-NEXT: ret{{[l|q]}} 175 %res = and <4 x i64> %a0, %a1 176 ret <4 x i64> %res 177 } 178 179 define <4 x i64> @test_mm256_andnot_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { 180 ; CHECK-LABEL: test_mm256_andnot_si256: 181 ; CHECK: # %bb.0: 182 ; CHECK-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 183 ; CHECK-NEXT: vpxor %ymm2, %ymm0, %ymm0 184 ; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 185 ; CHECK-NEXT: ret{{[l|q]}} 186 %not = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1> 187 %res = and <4 x i64> %not, %a1 188 ret <4 x i64> %res 189 } 190 191 define <4 x i64> @test_mm256_avg_epu8(<4 x i64> %a0, <4 x i64> %a1) nounwind { 192 ; CHECK-LABEL: test_mm256_avg_epu8: 193 ; CHECK: # %bb.0: 194 ; CHECK-NEXT: vpavgb %ymm1, %ymm0, %ymm0 195 ; CHECK-NEXT: ret{{[l|q]}} 196 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 197 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 198 %zext0 = zext <32 x i8> %arg0 to <32 x i16> 199 %zext1 = zext <32 x i8> %arg1 to <32 x i16> 200 %add = add <32 x i16> %zext0, %zext1 201 %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 202 %lshr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 203 %res = trunc <32 x i16> %lshr to <32 x i8> 204 %bc = bitcast <32 x i8> %res to <4 x i64> 205 ret <4 x i64> %bc 206 } 207 208 define <4 x i64> @test_mm256_avg_epu16(<4 x i64> %a0, <4 x i64> %a1) nounwind { 209 ; CHECK-LABEL: test_mm256_avg_epu16: 210 ; CHECK: # %bb.0: 211 ; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm0 212 ; CHECK-NEXT: ret{{[l|q]}} 213 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 214 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 215 %zext0 = zext <16 x i16> %arg0 to <16 x i32> 216 %zext1 = zext <16 x i16> %arg1 to <16 x i32> 217 %add = add <16 x i32> %zext0, %zext1 218 %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 219 %lshr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 220 %res = trunc <16 x i32> %lshr to <16 x i16> 221 %bc = bitcast <16 x i16> %res to <4 x i64> 222 ret <4 x i64> %bc 223 } 224 225 define <4 x i64> @test_mm256_blend_epi16(<4 x i64> %a0, <4 x i64> %a1) { 226 ; CHECK-LABEL: test_mm256_blend_epi16: 227 ; CHECK: # %bb.0: 228 ; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15] 229 ; CHECK-NEXT: ret{{[l|q]}} 230 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 231 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 232 %shuf = shufflevector <16 x i16> %arg0, <16 x i16> %arg1, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 233 %res = bitcast <16 x i16> %shuf to <4 x i64> 234 ret <4 x i64> %res 235 } 236 237 define <2 x i64> @test_mm_blend_epi32(<2 x i64> %a0, <2 x i64> %a1) { 238 ; CHECK-LABEL: test_mm_blend_epi32: 239 ; CHECK: # %bb.0: 240 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 241 ; CHECK-NEXT: ret{{[l|q]}} 242 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 243 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 244 %shuf = shufflevector <4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 245 %res = bitcast <4 x i32> %shuf to <2 x i64> 246 ret <2 x i64> %res 247 } 248 249 define <4 x i64> @test_mm256_blend_epi32(<4 x i64> %a0, <4 x i64> %a1) { 250 ; CHECK-LABEL: test_mm256_blend_epi32: 251 ; CHECK: # %bb.0: 252 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7] 253 ; CHECK-NEXT: ret{{[l|q]}} 254 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 255 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 256 %shuf = shufflevector <8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7> 257 %res = bitcast <8 x i32> %shuf to <4 x i64> 258 ret <4 x i64> %res 259 } 260 261 define <4 x i64> @test_mm256_blendv_epi8(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 262 ; CHECK-LABEL: test_mm256_blendv_epi8: 263 ; CHECK: # %bb.0: 264 ; CHECK-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 265 ; CHECK-NEXT: ret{{[l|q]}} 266 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 267 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 268 %arg2 = bitcast <4 x i64> %a2 to <32 x i8> 269 %call = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %arg0, <32 x i8> %arg1, <32 x i8> %arg2) 270 %res = bitcast <32 x i8> %call to <4 x i64> 271 ret <4 x i64> %res 272 } 273 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone 274 275 define <2 x i64> @test_mm_broadcastb_epi8(<2 x i64> %a0) { 276 ; CHECK-LABEL: test_mm_broadcastb_epi8: 277 ; CHECK: # %bb.0: 278 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 279 ; CHECK-NEXT: ret{{[l|q]}} 280 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 281 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <16 x i32> zeroinitializer 282 %res = bitcast <16 x i8> %shuf to <2 x i64> 283 ret <2 x i64> %res 284 } 285 286 define <4 x i64> @test_mm256_broadcastb_epi8(<4 x i64> %a0) { 287 ; CHECK-LABEL: test_mm256_broadcastb_epi8: 288 ; CHECK: # %bb.0: 289 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 290 ; CHECK-NEXT: ret{{[l|q]}} 291 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 292 %shuf = shufflevector <32 x i8> %arg0, <32 x i8> undef, <32 x i32> zeroinitializer 293 %res = bitcast <32 x i8> %shuf to <4 x i64> 294 ret <4 x i64> %res 295 } 296 297 define <2 x i64> @test_mm_broadcastd_epi32(<2 x i64> %a0) { 298 ; CHECK-LABEL: test_mm_broadcastd_epi32: 299 ; CHECK: # %bb.0: 300 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 301 ; CHECK-NEXT: ret{{[l|q]}} 302 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 303 %shuf = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer 304 %res = bitcast <4 x i32> %shuf to <2 x i64> 305 ret <2 x i64> %res 306 } 307 308 define <4 x i64> @test_mm256_broadcastd_epi32(<4 x i64> %a0) { 309 ; CHECK-LABEL: test_mm256_broadcastd_epi32: 310 ; CHECK: # %bb.0: 311 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 312 ; CHECK-NEXT: ret{{[l|q]}} 313 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 314 %shuf = shufflevector <8 x i32> %arg0, <8 x i32> undef, <8 x i32> zeroinitializer 315 %res = bitcast <8 x i32> %shuf to <4 x i64> 316 ret <4 x i64> %res 317 } 318 319 define <2 x i64> @test_mm_broadcastq_epi64(<2 x i64> %a0) { 320 ; CHECK-LABEL: test_mm_broadcastq_epi64: 321 ; CHECK: # %bb.0: 322 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 323 ; CHECK-NEXT: ret{{[l|q]}} 324 %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer 325 ret <2 x i64> %res 326 } 327 328 define <4 x i64> @test_mm256_broadcastq_epi64(<4 x i64> %a0) { 329 ; CHECK-LABEL: test_mm256_broadcastq_epi64: 330 ; CHECK: # %bb.0: 331 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 332 ; CHECK-NEXT: ret{{[l|q]}} 333 %res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer 334 ret <4 x i64> %res 335 } 336 337 define <2 x double> @test_mm_broadcastsd_pd(<2 x double> %a0) { 338 ; CHECK-LABEL: test_mm_broadcastsd_pd: 339 ; CHECK: # %bb.0: 340 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 341 ; CHECK-NEXT: ret{{[l|q]}} 342 %res = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer 343 ret <2 x double> %res 344 } 345 346 define <4 x double> @test_mm256_broadcastsd_pd(<4 x double> %a0) { 347 ; CHECK-LABEL: test_mm256_broadcastsd_pd: 348 ; CHECK: # %bb.0: 349 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 350 ; CHECK-NEXT: ret{{[l|q]}} 351 %res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> zeroinitializer 352 ret <4 x double> %res 353 } 354 355 define <4 x i64> @test_mm256_broadcastsi128_si256(<2 x i64> %a0) { 356 ; CHECK-LABEL: test_mm256_broadcastsi128_si256: 357 ; CHECK: # %bb.0: 358 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 359 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 360 ; CHECK-NEXT: ret{{[l|q]}} 361 %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 362 ret <4 x i64> %res 363 } 364 365 define <4 x i64> @test_mm256_broadcastsi128_si256_mem(<2 x i64>* %p0) { 366 ; X86-LABEL: test_mm256_broadcastsi128_si256_mem: 367 ; X86: # %bb.0: 368 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 369 ; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 370 ; X86-NEXT: retl 371 ; 372 ; X64-LABEL: test_mm256_broadcastsi128_si256_mem: 373 ; X64: # %bb.0: 374 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 375 ; X64-NEXT: retq 376 %a0 = load <2 x i64>, <2 x i64>* %p0 377 %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 378 ret <4 x i64> %res 379 } 380 381 define <4 x float> @test_mm_broadcastss_ps(<4 x float> %a0) { 382 ; CHECK-LABEL: test_mm_broadcastss_ps: 383 ; CHECK: # %bb.0: 384 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 385 ; CHECK-NEXT: ret{{[l|q]}} 386 %res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer 387 ret <4 x float> %res 388 } 389 390 define <8 x float> @test_mm256_broadcastss_ps(<8 x float> %a0) { 391 ; CHECK-LABEL: test_mm256_broadcastss_ps: 392 ; CHECK: # %bb.0: 393 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 394 ; CHECK-NEXT: ret{{[l|q]}} 395 %res = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer 396 ret <8 x float> %res 397 } 398 399 define <2 x i64> @test_mm_broadcastw_epi16(<2 x i64> %a0) { 400 ; CHECK-LABEL: test_mm_broadcastw_epi16: 401 ; CHECK: # %bb.0: 402 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 403 ; CHECK-NEXT: ret{{[l|q]}} 404 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 405 %shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> zeroinitializer 406 %res = bitcast <8 x i16> %shuf to <2 x i64> 407 ret <2 x i64> %res 408 } 409 410 define <4 x i64> @test_mm256_broadcastw_epi16(<4 x i64> %a0) { 411 ; CHECK-LABEL: test_mm256_broadcastw_epi16: 412 ; CHECK: # %bb.0: 413 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 414 ; CHECK-NEXT: ret{{[l|q]}} 415 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 416 %shuf = shufflevector <16 x i16> %arg0, <16 x i16> undef, <16 x i32> zeroinitializer 417 %res = bitcast <16 x i16> %shuf to <4 x i64> 418 ret <4 x i64> %res 419 } 420 421 define <4 x i64> @test_mm256_bslli_epi128(<4 x i64> %a0) { 422 ; CHECK-LABEL: test_mm256_bslli_epi128: 423 ; CHECK: # %bb.0: 424 ; CHECK-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] 425 ; CHECK-NEXT: ret{{[l|q]}} 426 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 427 %shuf = shufflevector <32 x i8> zeroinitializer, <32 x i8> %arg0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60> 428 %res = bitcast <32 x i8> %shuf to <4 x i64> 429 ret <4 x i64> %res 430 } 431 432 define <4 x i64> @test_mm256_bsrli_epi128(<4 x i64> %a0) { 433 ; CHECK-LABEL: test_mm256_bsrli_epi128: 434 ; CHECK: # %bb.0: 435 ; CHECK-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero 436 ; CHECK-NEXT: ret{{[l|q]}} 437 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 438 %shuf = shufflevector <32 x i8> %arg0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50> 439 %res = bitcast <32 x i8> %shuf to <4 x i64> 440 ret <4 x i64> %res 441 } 442 443 define <4 x i64> @test_mm256_cmpeq_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind { 444 ; CHECK-LABEL: test_mm256_cmpeq_epi8: 445 ; CHECK: # %bb.0: 446 ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 447 ; CHECK-NEXT: ret{{[l|q]}} 448 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 449 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 450 %cmp = icmp eq <32 x i8> %arg0, %arg1 451 %res = sext <32 x i1> %cmp to <32 x i8> 452 %bc = bitcast <32 x i8> %res to <4 x i64> 453 ret <4 x i64> %bc 454 } 455 456 define <4 x i64> @test_mm256_cmpeq_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind { 457 ; CHECK-LABEL: test_mm256_cmpeq_epi16: 458 ; CHECK: # %bb.0: 459 ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 460 ; CHECK-NEXT: ret{{[l|q]}} 461 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 462 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 463 %cmp = icmp eq <16 x i16> %arg0, %arg1 464 %res = sext <16 x i1> %cmp to <16 x i16> 465 %bc = bitcast <16 x i16> %res to <4 x i64> 466 ret <4 x i64> %bc 467 } 468 469 define <4 x i64> @test_mm256_cmpeq_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 470 ; CHECK-LABEL: test_mm256_cmpeq_epi32: 471 ; CHECK: # %bb.0: 472 ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 473 ; CHECK-NEXT: ret{{[l|q]}} 474 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 475 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 476 %cmp = icmp eq <8 x i32> %arg0, %arg1 477 %res = sext <8 x i1> %cmp to <8 x i32> 478 %bc = bitcast <8 x i32> %res to <4 x i64> 479 ret <4 x i64> %bc 480 } 481 482 define <4 x i64> @test_mm256_cmpeq_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind { 483 ; CHECK-LABEL: test_mm256_cmpeq_epi64: 484 ; CHECK: # %bb.0: 485 ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 486 ; CHECK-NEXT: ret{{[l|q]}} 487 %cmp = icmp eq <4 x i64> %a0, %a1 488 %res = sext <4 x i1> %cmp to <4 x i64> 489 ret <4 x i64> %res 490 } 491 492 define <4 x i64> @test_mm256_cmpgt_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind { 493 ; CHECK-LABEL: test_mm256_cmpgt_epi8: 494 ; CHECK: # %bb.0: 495 ; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 496 ; CHECK-NEXT: ret{{[l|q]}} 497 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 498 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 499 %cmp = icmp sgt <32 x i8> %arg0, %arg1 500 %res = sext <32 x i1> %cmp to <32 x i8> 501 %bc = bitcast <32 x i8> %res to <4 x i64> 502 ret <4 x i64> %bc 503 } 504 505 define <4 x i64> @test_mm256_cmpgt_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind { 506 ; CHECK-LABEL: test_mm256_cmpgt_epi16: 507 ; CHECK: # %bb.0: 508 ; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 509 ; CHECK-NEXT: ret{{[l|q]}} 510 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 511 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 512 %cmp = icmp sgt <16 x i16> %arg0, %arg1 513 %res = sext <16 x i1> %cmp to <16 x i16> 514 %bc = bitcast <16 x i16> %res to <4 x i64> 515 ret <4 x i64> %bc 516 } 517 518 define <4 x i64> @test_mm256_cmpgt_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 519 ; CHECK-LABEL: test_mm256_cmpgt_epi32: 520 ; CHECK: # %bb.0: 521 ; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 522 ; CHECK-NEXT: ret{{[l|q]}} 523 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 524 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 525 %cmp = icmp sgt <8 x i32> %arg0, %arg1 526 %res = sext <8 x i1> %cmp to <8 x i32> 527 %bc = bitcast <8 x i32> %res to <4 x i64> 528 ret <4 x i64> %bc 529 } 530 531 define <4 x i64> @test_mm256_cmpgt_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind { 532 ; CHECK-LABEL: test_mm256_cmpgt_epi64: 533 ; CHECK: # %bb.0: 534 ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 535 ; CHECK-NEXT: ret{{[l|q]}} 536 %cmp = icmp sgt <4 x i64> %a0, %a1 537 %res = sext <4 x i1> %cmp to <4 x i64> 538 ret <4 x i64> %res 539 } 540 541 define <4 x i64> @test_mm256_cvtepi8_epi16(<2 x i64> %a0) { 542 ; CHECK-LABEL: test_mm256_cvtepi8_epi16: 543 ; CHECK: # %bb.0: 544 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 545 ; CHECK-NEXT: ret{{[l|q]}} 546 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 547 %ext = sext <16 x i8> %arg0 to <16 x i16> 548 %res = bitcast <16 x i16> %ext to <4 x i64> 549 ret <4 x i64> %res 550 } 551 552 define <4 x i64> @test_mm256_cvtepi8_epi32(<2 x i64> %a0) { 553 ; CHECK-LABEL: test_mm256_cvtepi8_epi32: 554 ; CHECK: # %bb.0: 555 ; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0 556 ; CHECK-NEXT: ret{{[l|q]}} 557 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 558 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 559 %ext = sext <8 x i8> %shuf to <8 x i32> 560 %res = bitcast <8 x i32> %ext to <4 x i64> 561 ret <4 x i64> %res 562 } 563 564 define <4 x i64> @test_mm256_cvtepi8_epi64(<2 x i64> %a0) { 565 ; CHECK-LABEL: test_mm256_cvtepi8_epi64: 566 ; CHECK: # %bb.0: 567 ; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0 568 ; CHECK-NEXT: ret{{[l|q]}} 569 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 570 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 571 %ext = sext <4 x i8> %shuf to <4 x i64> 572 ret <4 x i64> %ext 573 } 574 575 define <4 x i64> @test_mm256_cvtepi16_epi32(<2 x i64> %a0) { 576 ; CHECK-LABEL: test_mm256_cvtepi16_epi32: 577 ; CHECK: # %bb.0: 578 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 579 ; CHECK-NEXT: ret{{[l|q]}} 580 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 581 %ext = sext <8 x i16> %arg0 to <8 x i32> 582 %res = bitcast <8 x i32> %ext to <4 x i64> 583 ret <4 x i64> %res 584 } 585 586 define <4 x i64> @test_mm256_cvtepi16_epi64(<2 x i64> %a0) { 587 ; CHECK-LABEL: test_mm256_cvtepi16_epi64: 588 ; CHECK: # %bb.0: 589 ; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0 590 ; CHECK-NEXT: ret{{[l|q]}} 591 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 592 %shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 593 %ext = sext <4 x i16> %shuf to <4 x i64> 594 ret <4 x i64> %ext 595 } 596 597 define <4 x i64> @test_mm256_cvtepi32_epi64(<2 x i64> %a0) { 598 ; CHECK-LABEL: test_mm256_cvtepi32_epi64: 599 ; CHECK: # %bb.0: 600 ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 601 ; CHECK-NEXT: ret{{[l|q]}} 602 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 603 %ext = sext <4 x i32> %arg0 to <4 x i64> 604 ret <4 x i64> %ext 605 } 606 607 define <4 x i64> @test_mm256_cvtepu8_epi16(<2 x i64> %a0) { 608 ; CHECK-LABEL: test_mm256_cvtepu8_epi16: 609 ; CHECK: # %bb.0: 610 ; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 611 ; CHECK-NEXT: ret{{[l|q]}} 612 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 613 %ext = zext <16 x i8> %arg0 to <16 x i16> 614 %res = bitcast <16 x i16> %ext to <4 x i64> 615 ret <4 x i64> %res 616 } 617 618 define <4 x i64> @test_mm256_cvtepu8_epi32(<2 x i64> %a0) { 619 ; CHECK-LABEL: test_mm256_cvtepu8_epi32: 620 ; CHECK: # %bb.0: 621 ; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 622 ; CHECK-NEXT: ret{{[l|q]}} 623 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 624 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 625 %ext = zext <8 x i8> %shuf to <8 x i32> 626 %res = bitcast <8 x i32> %ext to <4 x i64> 627 ret <4 x i64> %res 628 } 629 630 define <4 x i64> @test_mm256_cvtepu8_epi64(<2 x i64> %a0) { 631 ; CHECK-LABEL: test_mm256_cvtepu8_epi64: 632 ; CHECK: # %bb.0: 633 ; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 634 ; CHECK-NEXT: ret{{[l|q]}} 635 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 636 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 637 %ext = zext <4 x i8> %shuf to <4 x i64> 638 ret <4 x i64> %ext 639 } 640 641 define <4 x i64> @test_mm256_cvtepu16_epi32(<2 x i64> %a0) { 642 ; CHECK-LABEL: test_mm256_cvtepu16_epi32: 643 ; CHECK: # %bb.0: 644 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 645 ; CHECK-NEXT: ret{{[l|q]}} 646 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 647 %ext = zext <8 x i16> %arg0 to <8 x i32> 648 %res = bitcast <8 x i32> %ext to <4 x i64> 649 ret <4 x i64> %res 650 } 651 652 define <4 x i64> @test_mm256_cvtepu16_epi64(<2 x i64> %a0) { 653 ; CHECK-LABEL: test_mm256_cvtepu16_epi64: 654 ; CHECK: # %bb.0: 655 ; CHECK-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 656 ; CHECK-NEXT: ret{{[l|q]}} 657 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 658 %shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 659 %ext = zext <4 x i16> %shuf to <4 x i64> 660 ret <4 x i64> %ext 661 } 662 663 define <4 x i64> @test_mm256_cvtepu32_epi64(<2 x i64> %a0) { 664 ; CHECK-LABEL: test_mm256_cvtepu32_epi64: 665 ; CHECK: # %bb.0: 666 ; CHECK-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 667 ; CHECK-NEXT: ret{{[l|q]}} 668 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 669 %ext = zext <4 x i32> %arg0 to <4 x i64> 670 ret <4 x i64> %ext 671 } 672 673 define <2 x i64> @test_mm256_extracti128_si256(<4 x i64> %a0) nounwind { 674 ; CHECK-LABEL: test_mm256_extracti128_si256: 675 ; CHECK: # %bb.0: 676 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 677 ; CHECK-NEXT: vzeroupper 678 ; CHECK-NEXT: ret{{[l|q]}} 679 %res = shufflevector <4 x i64> %a0, <4 x i64> %a0, <2 x i32> <i32 2, i32 3> 680 ret <2 x i64> %res 681 } 682 683 define <4 x i64> @test_mm256_hadd_epi16(<4 x i64> %a0, <4 x i64> %a1) { 684 ; CHECK-LABEL: test_mm256_hadd_epi16: 685 ; CHECK: # %bb.0: 686 ; CHECK-NEXT: vphaddw %ymm1, %ymm0, %ymm0 687 ; CHECK-NEXT: ret{{[l|q]}} 688 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 689 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 690 %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %arg0, <16 x i16> %arg1) 691 %bc = bitcast <16 x i16> %res to <4 x i64> 692 ret <4 x i64> %bc 693 } 694 declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone 695 696 define <4 x i64> @test_mm256_hadd_epi32(<4 x i64> %a0, <4 x i64> %a1) { 697 ; CHECK-LABEL: test_mm256_hadd_epi32: 698 ; CHECK: # %bb.0: 699 ; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0 700 ; CHECK-NEXT: ret{{[l|q]}} 701 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 702 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 703 %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %arg0, <8 x i32> %arg1) 704 %bc = bitcast <8 x i32> %res to <4 x i64> 705 ret <4 x i64> %bc 706 } 707 declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone 708 709 define <4 x i64> @test_mm256_hadds_epi16(<4 x i64> %a0, <4 x i64> %a1) { 710 ; CHECK-LABEL: test_mm256_hadds_epi16: 711 ; CHECK: # %bb.0: 712 ; CHECK-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 713 ; CHECK-NEXT: ret{{[l|q]}} 714 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 715 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 716 %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %arg0, <16 x i16> %arg1) 717 %bc = bitcast <16 x i16> %res to <4 x i64> 718 ret <4 x i64> %bc 719 } 720 declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone 721 722 define <4 x i64> @test_mm256_hsub_epi16(<4 x i64> %a0, <4 x i64> %a1) { 723 ; CHECK-LABEL: test_mm256_hsub_epi16: 724 ; CHECK: # %bb.0: 725 ; CHECK-NEXT: vphsubw %ymm1, %ymm0, %ymm0 726 ; CHECK-NEXT: ret{{[l|q]}} 727 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 728 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 729 %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %arg0, <16 x i16> %arg1) 730 %bc = bitcast <16 x i16> %res to <4 x i64> 731 ret <4 x i64> %bc 732 } 733 declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone 734 735 define <4 x i64> @test_mm256_hsub_epi32(<4 x i64> %a0, <4 x i64> %a1) { 736 ; CHECK-LABEL: test_mm256_hsub_epi32: 737 ; CHECK: # %bb.0: 738 ; CHECK-NEXT: vphsubd %ymm1, %ymm0, %ymm0 739 ; CHECK-NEXT: ret{{[l|q]}} 740 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 741 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 742 %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %arg0, <8 x i32> %arg1) 743 %bc = bitcast <8 x i32> %res to <4 x i64> 744 ret <4 x i64> %bc 745 } 746 declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone 747 748 define <4 x i64> @test_mm256_hsubs_epi16(<4 x i64> %a0, <4 x i64> %a1) { 749 ; CHECK-LABEL: test_mm256_hsubs_epi16: 750 ; CHECK: # %bb.0: 751 ; CHECK-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 752 ; CHECK-NEXT: ret{{[l|q]}} 753 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 754 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 755 %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %arg0, <16 x i16> %arg1) 756 %bc = bitcast <16 x i16> %res to <4 x i64> 757 ret <4 x i64> %bc 758 } 759 declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone 760 761 define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) { 762 ; X86-LABEL: test_mm_i32gather_epi32: 763 ; X86: # %bb.0: 764 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 765 ; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 766 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 767 ; X86-NEXT: vpgatherdd %xmm2, (%eax,%xmm0,2), %xmm1 768 ; X86-NEXT: vmovdqa %xmm1, %xmm0 769 ; X86-NEXT: retl 770 ; 771 ; X64-LABEL: test_mm_i32gather_epi32: 772 ; X64: # %bb.0: 773 ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 774 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 775 ; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1 776 ; X64-NEXT: vmovdqa %xmm1, %xmm0 777 ; X64-NEXT: retq 778 %arg0 = bitcast i32 *%a0 to i8* 779 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 780 %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32> 781 %call = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> undef, i8* %arg0, <4 x i32> %arg1, <4 x i32> %mask, i8 2) 782 %bc = bitcast <4 x i32> %call to <2 x i64> 783 ret <2 x i64> %bc 784 } 785 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly 786 787 define <2 x i64> @test_mm_mask_i32gather_epi32(<2 x i64> %a0, i32 *%a1, <2 x i64> %a2, <2 x i64> %a3) { 788 ; X86-LABEL: test_mm_mask_i32gather_epi32: 789 ; X86: # %bb.0: 790 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 791 ; X86-NEXT: vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0 792 ; X86-NEXT: retl 793 ; 794 ; X64-LABEL: test_mm_mask_i32gather_epi32: 795 ; X64: # %bb.0: 796 ; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 797 ; X64-NEXT: retq 798 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 799 %arg1 = bitcast i32 *%a1 to i8* 800 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 801 %arg3 = bitcast <2 x i64> %a3 to <4 x i32> 802 %call = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %arg0, i8* %arg1, <4 x i32> %arg2, <4 x i32> %arg3, i8 2) 803 %bc = bitcast <4 x i32> %call to <2 x i64> 804 ret <2 x i64> %bc 805 } 806 807 define <4 x i64> @test_mm256_i32gather_epi32(i32 *%a0, <4 x i64> %a1) { 808 ; X86-LABEL: test_mm256_i32gather_epi32: 809 ; X86: # %bb.0: 810 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 811 ; X86-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 812 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 813 ; X86-NEXT: vpgatherdd %ymm2, (%eax,%ymm0,2), %ymm1 814 ; X86-NEXT: vmovdqa %ymm1, %ymm0 815 ; X86-NEXT: retl 816 ; 817 ; X64-LABEL: test_mm256_i32gather_epi32: 818 ; X64: # %bb.0: 819 ; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 820 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 821 ; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm0,2), %ymm1 822 ; X64-NEXT: vmovdqa %ymm1, %ymm0 823 ; X64-NEXT: retq 824 %arg0 = bitcast i32 *%a0 to i8* 825 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 826 %mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <8 x i32> 827 %call = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> undef, i8* %arg0, <8 x i32> %arg1, <8 x i32> %mask, i8 2) 828 %bc = bitcast <8 x i32> %call to <4 x i64> 829 ret <4 x i64> %bc 830 } 831 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly 832 833 define <4 x i64> @test_mm256_mask_i32gather_epi32(<4 x i64> %a0, i32 *%a1, <4 x i64> %a2, <4 x i64> %a3) { 834 ; X86-LABEL: test_mm256_mask_i32gather_epi32: 835 ; X86: # %bb.0: 836 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 837 ; X86-NEXT: vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0 838 ; X86-NEXT: retl 839 ; 840 ; X64-LABEL: test_mm256_mask_i32gather_epi32: 841 ; X64: # %bb.0: 842 ; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 843 ; X64-NEXT: retq 844 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 845 %arg1 = bitcast i32 *%a1 to i8* 846 %arg2 = bitcast <4 x i64> %a2 to <8 x i32> 847 %arg3 = bitcast <4 x i64> %a3 to <8 x i32> 848 %call = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %arg0, i8* %arg1, <8 x i32> %arg2, <8 x i32> %arg3, i8 2) 849 %bc = bitcast <8 x i32> %call to <4 x i64> 850 ret <4 x i64> %bc 851 } 852 853 define <2 x i64> @test_mm_i32gather_epi64(i64 *%a0, <2 x i64> %a1) { 854 ; X86-LABEL: test_mm_i32gather_epi64: 855 ; X86: # %bb.0: 856 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 857 ; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 858 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 859 ; X86-NEXT: vpgatherdq %xmm2, (%eax,%xmm0,2), %xmm1 860 ; X86-NEXT: vmovdqa %xmm1, %xmm0 861 ; X86-NEXT: retl 862 ; 863 ; X64-LABEL: test_mm_i32gather_epi64: 864 ; X64: # %bb.0: 865 ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 866 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 867 ; X64-NEXT: vpgatherdq %xmm2, (%rdi,%xmm0,2), %xmm1 868 ; X64-NEXT: vmovdqa %xmm1, %xmm0 869 ; X64-NEXT: retq 870 %arg0 = bitcast i64 *%a0 to i8* 871 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 872 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> undef, i8* %arg0, <4 x i32> %arg1, <2 x i64> <i64 -1, i64 -1>, i8 2) 873 ret <2 x i64> %res 874 } 875 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly 876 877 define <2 x i64> @test_mm_mask_i32gather_epi64(<2 x i64> %a0, i64 *%a1, <2 x i64> %a2, <2 x i64> %a3) { 878 ; X86-LABEL: test_mm_mask_i32gather_epi64: 879 ; X86: # %bb.0: 880 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 881 ; X86-NEXT: vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0 882 ; X86-NEXT: retl 883 ; 884 ; X64-LABEL: test_mm_mask_i32gather_epi64: 885 ; X64: # %bb.0: 886 ; X64-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 887 ; X64-NEXT: retq 888 %arg1 = bitcast i64 *%a1 to i8* 889 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 890 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <2 x i64> %a3, i8 2) 891 ret <2 x i64> %res 892 } 893 894 define <4 x i64> @test_mm256_i32gather_epi64(i64 *%a0, <2 x i64> %a1) { 895 ; X86-LABEL: test_mm256_i32gather_epi64: 896 ; X86: # %bb.0: 897 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 898 ; X86-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 899 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 900 ; X86-NEXT: vpgatherdq %ymm2, (%eax,%xmm0,2), %ymm1 901 ; X86-NEXT: vmovdqa %ymm1, %ymm0 902 ; X86-NEXT: retl 903 ; 904 ; X64-LABEL: test_mm256_i32gather_epi64: 905 ; X64: # %bb.0: 906 ; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 907 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 908 ; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm0,2), %ymm1 909 ; X64-NEXT: vmovdqa %ymm1, %ymm0 910 ; X64-NEXT: retq 911 %arg0 = bitcast i64 *%a0 to i8* 912 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 913 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8* %arg0, <4 x i32> %arg1, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i8 2) 914 ret <4 x i64> %res 915 } 916 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly 917 918 define <4 x i64> @test_mm256_mask_i32gather_epi64(<4 x i64> %a0, i64 *%a1, <2 x i64> %a2, <4 x i64> %a3) { 919 ; X86-LABEL: test_mm256_mask_i32gather_epi64: 920 ; X86: # %bb.0: 921 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 922 ; X86-NEXT: vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0 923 ; X86-NEXT: retl 924 ; 925 ; X64-LABEL: test_mm256_mask_i32gather_epi64: 926 ; X64: # %bb.0: 927 ; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 928 ; X64-NEXT: retq 929 %arg1 = bitcast i64 *%a1 to i8* 930 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 931 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <4 x i64> %a3, i8 2) 932 ret <4 x i64> %res 933 } 934 935 define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) { 936 ; X86-LABEL: test_mm_i32gather_pd: 937 ; X86: # %bb.0: 938 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 939 ; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 940 ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 941 ; X86-NEXT: vgatherdpd %xmm2, (%eax,%xmm0,2), %xmm1 942 ; X86-NEXT: vmovapd %xmm1, %xmm0 943 ; X86-NEXT: retl 944 ; 945 ; X64-LABEL: test_mm_i32gather_pd: 946 ; X64: # %bb.0: 947 ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 948 ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 949 ; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1 950 ; X64-NEXT: vmovapd %xmm1, %xmm0 951 ; X64-NEXT: retq 952 %arg0 = bitcast double *%a0 to i8* 953 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 954 %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer 955 %sext = sext <2 x i1> %cmp to <2 x i64> 956 %mask = bitcast <2 x i64> %sext to <2 x double> 957 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> undef, i8* %arg0, <4 x i32> %arg1, <2 x double> %mask, i8 2) 958 ret <2 x double> %res 959 } 960 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly 961 962 define <2 x double> @test_mm_mask_i32gather_pd(<2 x double> %a0, double *%a1, <2 x i64> %a2, <2 x double> %a3) { 963 ; X86-LABEL: test_mm_mask_i32gather_pd: 964 ; X86: # %bb.0: 965 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 966 ; X86-NEXT: vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0 967 ; X86-NEXT: retl 968 ; 969 ; X64-LABEL: test_mm_mask_i32gather_pd: 970 ; X64: # %bb.0: 971 ; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 972 ; X64-NEXT: retq 973 %arg1 = bitcast double *%a1 to i8* 974 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 975 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %arg1, <4 x i32> %arg2, <2 x double> %a3, i8 2) 976 ret <2 x double> %res 977 } 978 979 define <4 x double> @test_mm256_i32gather_pd(double *%a0, <2 x i64> %a1) { 980 ; X86-LABEL: test_mm256_i32gather_pd: 981 ; X86: # %bb.0: 982 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 983 ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 984 ; X86-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2 985 ; X86-NEXT: vgatherdpd %ymm2, (%eax,%xmm0,2), %ymm1 986 ; X86-NEXT: vmovapd %ymm1, %ymm0 987 ; X86-NEXT: retl 988 ; 989 ; X64-LABEL: test_mm256_i32gather_pd: 990 ; X64: # %bb.0: 991 ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 992 ; X64-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2 993 ; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm0,2), %ymm1 994 ; X64-NEXT: vmovapd %ymm1, %ymm0 995 ; X64-NEXT: retq 996 %arg0 = bitcast double *%a0 to i8* 997 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 998 %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0) 999 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8* %arg0, <4 x i32> %arg1, <4 x double> %mask, i8 2) 1000 ret <4 x double> %res 1001 } 1002 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly 1003 1004 define <4 x double> @test_mm256_mask_i32gather_pd(<4 x double> %a0, double *%a1, <2 x i64> %a2, <4 x double> %a3) { 1005 ; X86-LABEL: test_mm256_mask_i32gather_pd: 1006 ; X86: # %bb.0: 1007 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1008 ; X86-NEXT: vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0 1009 ; X86-NEXT: retl 1010 ; 1011 ; X64-LABEL: test_mm256_mask_i32gather_pd: 1012 ; X64: # %bb.0: 1013 ; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0 1014 ; X64-NEXT: retq 1015 %arg1 = bitcast double *%a1 to i8* 1016 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 1017 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %arg1, <4 x i32> %arg2, <4 x double> %a3, i8 2) 1018 ret <4 x double> %res 1019 } 1020 1021 define <4 x float> @test_mm_i32gather_ps(float *%a0, <2 x i64> %a1) { 1022 ; X86-LABEL: test_mm_i32gather_ps: 1023 ; X86: # %bb.0: 1024 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1025 ; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1026 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 1027 ; X86-NEXT: vgatherdps %xmm2, (%eax,%xmm0,2), %xmm1 1028 ; X86-NEXT: vmovaps %xmm1, %xmm0 1029 ; X86-NEXT: retl 1030 ; 1031 ; X64-LABEL: test_mm_i32gather_ps: 1032 ; X64: # %bb.0: 1033 ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1034 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 1035 ; X64-NEXT: vgatherdps %xmm2, (%rdi,%xmm0,2), %xmm1 1036 ; X64-NEXT: vmovaps %xmm1, %xmm0 1037 ; X64-NEXT: retq 1038 %arg0 = bitcast float *%a0 to i8* 1039 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 1040 %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer 1041 %sext = sext <4 x i1> %cmp to <4 x i32> 1042 %mask = bitcast <4 x i32> %sext to <4 x float> 1043 %call = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8* %arg0, <4 x i32> %arg1, <4 x float> %mask, i8 2) 1044 ret <4 x float> %call 1045 } 1046 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly 1047 1048 define <4 x float> @test_mm_mask_i32gather_ps(<4 x float> %a0, float *%a1, <2 x i64> %a2, <4 x float> %a3) { 1049 ; X86-LABEL: test_mm_mask_i32gather_ps: 1050 ; X86: # %bb.0: 1051 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1052 ; X86-NEXT: vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0 1053 ; X86-NEXT: retl 1054 ; 1055 ; X64-LABEL: test_mm_mask_i32gather_ps: 1056 ; X64: # %bb.0: 1057 ; X64-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 1058 ; X64-NEXT: retq 1059 %arg1 = bitcast float *%a1 to i8* 1060 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 1061 %call = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %arg1, <4 x i32> %arg2, <4 x float> %a3, i8 2) 1062 ret <4 x float> %call 1063 } 1064 1065 define <8 x float> @test_mm256_i32gather_ps(float *%a0, <4 x i64> %a1) { 1066 ; X86-LABEL: test_mm256_i32gather_ps: 1067 ; X86: # %bb.0: 1068 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1069 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 1070 ; X86-NEXT: vcmpeqps %ymm1, %ymm1, %ymm2 1071 ; X86-NEXT: vgatherdps %ymm2, (%eax,%ymm0,2), %ymm1 1072 ; X86-NEXT: vmovaps %ymm1, %ymm0 1073 ; X86-NEXT: retl 1074 ; 1075 ; X64-LABEL: test_mm256_i32gather_ps: 1076 ; X64: # %bb.0: 1077 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 1078 ; X64-NEXT: vcmpeqps %ymm1, %ymm1, %ymm2 1079 ; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm0,2), %ymm1 1080 ; X64-NEXT: vmovaps %ymm1, %ymm0 1081 ; X64-NEXT: retq 1082 %arg0 = bitcast float *%a0 to i8* 1083 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1084 %mask = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i8 0) 1085 %call = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arg0, <8 x i32> %arg1, <8 x float> %mask, i8 2) 1086 ret <8 x float> %call 1087 } 1088 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly 1089 1090 define <8 x float> @test_mm256_mask_i32gather_ps(<8 x float> %a0, float *%a1, <4 x i64> %a2, <8 x float> %a3) { 1091 ; X86-LABEL: test_mm256_mask_i32gather_ps: 1092 ; X86: # %bb.0: 1093 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1094 ; X86-NEXT: vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0 1095 ; X86-NEXT: retl 1096 ; 1097 ; X64-LABEL: test_mm256_mask_i32gather_ps: 1098 ; X64: # %bb.0: 1099 ; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0 1100 ; X64-NEXT: retq 1101 %arg1 = bitcast float *%a1 to i8* 1102 %arg2 = bitcast <4 x i64> %a2 to <8 x i32> 1103 %call = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %arg1, <8 x i32> %arg2, <8 x float> %a3, i8 2) 1104 ret <8 x float> %call 1105 } 1106 1107 define <2 x i64> @test_mm_i64gather_epi32(i32 *%a0, <2 x i64> %a1) { 1108 ; X86-LABEL: test_mm_i64gather_epi32: 1109 ; X86: # %bb.0: 1110 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1111 ; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1112 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 1113 ; X86-NEXT: vpgatherqd %xmm2, (%eax,%xmm0,2), %xmm1 1114 ; X86-NEXT: vmovdqa %xmm1, %xmm0 1115 ; X86-NEXT: retl 1116 ; 1117 ; X64-LABEL: test_mm_i64gather_epi32: 1118 ; X64: # %bb.0: 1119 ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1120 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 1121 ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%xmm0,2), %xmm1 1122 ; X64-NEXT: vmovdqa %xmm1, %xmm0 1123 ; X64-NEXT: retq 1124 %arg0 = bitcast i32 *%a0 to i8* 1125 %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32> 1126 %call = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> undef, i8* %arg0, <2 x i64> %a1, <4 x i32> %mask, i8 2) 1127 %bc = bitcast <4 x i32> %call to <2 x i64> 1128 ret <2 x i64> %bc 1129 } 1130 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly 1131 1132 define <2 x i64> @test_mm_mask_i64gather_epi32(<2 x i64> %a0, i32 *%a1, <2 x i64> %a2, <2 x i64> %a3) { 1133 ; X86-LABEL: test_mm_mask_i64gather_epi32: 1134 ; X86: # %bb.0: 1135 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1136 ; X86-NEXT: vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0 1137 ; X86-NEXT: retl 1138 ; 1139 ; X64-LABEL: test_mm_mask_i64gather_epi32: 1140 ; X64: # %bb.0: 1141 ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 1142 ; X64-NEXT: retq 1143 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1144 %arg1 = bitcast i32 *%a1 to i8* 1145 %arg3 = bitcast <2 x i64> %a3 to <4 x i32> 1146 %call = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %arg0, i8* %arg1, <2 x i64> %a2, <4 x i32> %arg3, i8 2) 1147 %bc = bitcast <4 x i32> %call to <2 x i64> 1148 ret <2 x i64> %bc 1149 } 1150 1151 define <2 x i64> @test_mm256_i64gather_epi32(i32 *%a0, <4 x i64> %a1) { 1152 ; X86-LABEL: test_mm256_i64gather_epi32: 1153 ; X86: # %bb.0: 1154 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1155 ; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1156 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 1157 ; X86-NEXT: vpgatherqd %xmm2, (%eax,%ymm0,2), %xmm1 1158 ; X86-NEXT: vmovdqa %xmm1, %xmm0 1159 ; X86-NEXT: vzeroupper 1160 ; X86-NEXT: retl 1161 ; 1162 ; X64-LABEL: test_mm256_i64gather_epi32: 1163 ; X64: # %bb.0: 1164 ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1165 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 1166 ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%ymm0,2), %xmm1 1167 ; X64-NEXT: vmovdqa %xmm1, %xmm0 1168 ; X64-NEXT: vzeroupper 1169 ; X64-NEXT: retq 1170 %arg0 = bitcast i32 *%a0 to i8* 1171 %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32> 1172 %call = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8* %arg0, <4 x i64> %a1, <4 x i32> %mask, i8 2) 1173 %bc = bitcast <4 x i32> %call to <2 x i64> 1174 ret <2 x i64> %bc 1175 } 1176 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly 1177 1178 define <2 x i64> @test_mm256_mask_i64gather_epi32(<2 x i64> %a0, i32 *%a1, <4 x i64> %a2, <2 x i64> %a3) { 1179 ; X86-LABEL: test_mm256_mask_i64gather_epi32: 1180 ; X86: # %bb.0: 1181 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1182 ; X86-NEXT: vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0 1183 ; X86-NEXT: vzeroupper 1184 ; X86-NEXT: retl 1185 ; 1186 ; X64-LABEL: test_mm256_mask_i64gather_epi32: 1187 ; X64: # %bb.0: 1188 ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 1189 ; X64-NEXT: vzeroupper 1190 ; X64-NEXT: retq 1191 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1192 %arg1 = bitcast i32 *%a1 to i8* 1193 %arg3 = bitcast <2 x i64> %a3 to <4 x i32> 1194 %call = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %arg0, i8* %arg1, <4 x i64> %a2, <4 x i32> %arg3, i8 2) 1195 %bc = bitcast <4 x i32> %call to <2 x i64> 1196 ret <2 x i64> %bc 1197 } 1198 1199 define <2 x i64> @test_mm_i64gather_epi64(i64 *%a0, <2 x i64> %a1) { 1200 ; X86-LABEL: test_mm_i64gather_epi64: 1201 ; X86: # %bb.0: 1202 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1203 ; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1204 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 1205 ; X86-NEXT: vpgatherqq %xmm2, (%eax,%xmm0,2), %xmm1 1206 ; X86-NEXT: vmovdqa %xmm1, %xmm0 1207 ; X86-NEXT: retl 1208 ; 1209 ; X64-LABEL: test_mm_i64gather_epi64: 1210 ; X64: # %bb.0: 1211 ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1212 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 1213 ; X64-NEXT: vpgatherqq %xmm2, (%rdi,%xmm0,2), %xmm1 1214 ; X64-NEXT: vmovdqa %xmm1, %xmm0 1215 ; X64-NEXT: retq 1216 %arg0 = bitcast i64 *%a0 to i8* 1217 %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> undef, i8* %arg0, <2 x i64> %a1, <2 x i64> <i64 -1, i64 -1>, i8 2) 1218 ret <2 x i64> %call 1219 } 1220 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly 1221 1222 define <2 x i64> @test_mm_mask_i64gather_epi64(<2 x i64> %a0, i64 *%a1, <2 x i64> %a2, <2 x i64> %a3) { 1223 ; X86-LABEL: test_mm_mask_i64gather_epi64: 1224 ; X86: # %bb.0: 1225 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1226 ; X86-NEXT: vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 1227 ; X86-NEXT: retl 1228 ; 1229 ; X64-LABEL: test_mm_mask_i64gather_epi64: 1230 ; X64: # %bb.0: 1231 ; X64-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 1232 ; X64-NEXT: retq 1233 %arg1 = bitcast i64 *%a1 to i8* 1234 %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %arg1, <2 x i64> %a2, <2 x i64> %a3, i8 2) 1235 ret <2 x i64> %call 1236 } 1237 1238 define <4 x i64> @test_mm256_i64gather_epi64(i64 *%a0, <4 x i64> %a1) { 1239 ; X86-LABEL: test_mm256_i64gather_epi64: 1240 ; X86: # %bb.0: 1241 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1242 ; X86-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1243 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 1244 ; X86-NEXT: vpgatherqq %ymm2, (%eax,%ymm0,2), %ymm1 1245 ; X86-NEXT: vmovdqa %ymm1, %ymm0 1246 ; X86-NEXT: retl 1247 ; 1248 ; X64-LABEL: test_mm256_i64gather_epi64: 1249 ; X64: # %bb.0: 1250 ; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1251 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 1252 ; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm0,2), %ymm1 1253 ; X64-NEXT: vmovdqa %ymm1, %ymm0 1254 ; X64-NEXT: retq 1255 %arg0 = bitcast i64 *%a0 to i8* 1256 %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8* %arg0, <4 x i64> %a1, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i8 2) 1257 ret <4 x i64> %call 1258 } 1259 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly 1260 1261 define <4 x i64> @test_mm256_mask_i64gather_epi64(<4 x i64> %a0, i64 *%a1, <4 x i64> %a2, <4 x i64> %a3) { 1262 ; X86-LABEL: test_mm256_mask_i64gather_epi64: 1263 ; X86: # %bb.0: 1264 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1265 ; X86-NEXT: vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0 1266 ; X86-NEXT: retl 1267 ; 1268 ; X64-LABEL: test_mm256_mask_i64gather_epi64: 1269 ; X64: # %bb.0: 1270 ; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 1271 ; X64-NEXT: retq 1272 %arg1 = bitcast i64 *%a1 to i8* 1273 %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %arg1, <4 x i64> %a2, <4 x i64> %a3, i8 2) 1274 ret <4 x i64> %call 1275 } 1276 1277 define <2 x double> @test_mm_i64gather_pd(double *%a0, <2 x i64> %a1) { 1278 ; X86-LABEL: test_mm_i64gather_pd: 1279 ; X86: # %bb.0: 1280 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1281 ; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1282 ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1283 ; X86-NEXT: vgatherqpd %xmm2, (%eax,%xmm0,2), %xmm1 1284 ; X86-NEXT: vmovapd %xmm1, %xmm0 1285 ; X86-NEXT: retl 1286 ; 1287 ; X64-LABEL: test_mm_i64gather_pd: 1288 ; X64: # %bb.0: 1289 ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1290 ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1291 ; X64-NEXT: vgatherqpd %xmm2, (%rdi,%xmm0,2), %xmm1 1292 ; X64-NEXT: vmovapd %xmm1, %xmm0 1293 ; X64-NEXT: retq 1294 %arg0 = bitcast double *%a0 to i8* 1295 %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer 1296 %sext = sext <2 x i1> %cmp to <2 x i64> 1297 %mask = bitcast <2 x i64> %sext to <2 x double> 1298 %call = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> undef, i8* %arg0, <2 x i64> %a1, <2 x double> %mask, i8 2) 1299 ret <2 x double> %call 1300 } 1301 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly 1302 1303 define <2 x double> @test_mm_mask_i64gather_pd(<2 x double> %a0, double *%a1, <2 x i64> %a2, <2 x double> %a3) { 1304 ; X86-LABEL: test_mm_mask_i64gather_pd: 1305 ; X86: # %bb.0: 1306 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1307 ; X86-NEXT: vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0 1308 ; X86-NEXT: retl 1309 ; 1310 ; X64-LABEL: test_mm_mask_i64gather_pd: 1311 ; X64: # %bb.0: 1312 ; X64-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 1313 ; X64-NEXT: retq 1314 %arg1 = bitcast double *%a1 to i8* 1315 %call = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %arg1, <2 x i64> %a2, <2 x double> %a3, i8 2) 1316 ret <2 x double> %call 1317 } 1318 1319 define <4 x double> @test_mm256_i64gather_pd(double *%a0, <4 x i64> %a1) { 1320 ; X86-LABEL: test_mm256_i64gather_pd: 1321 ; X86: # %bb.0: 1322 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1323 ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1324 ; X86-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2 1325 ; X86-NEXT: vgatherqpd %ymm2, (%eax,%ymm0,2), %ymm1 1326 ; X86-NEXT: vmovapd %ymm1, %ymm0 1327 ; X86-NEXT: retl 1328 ; 1329 ; X64-LABEL: test_mm256_i64gather_pd: 1330 ; X64: # %bb.0: 1331 ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1332 ; X64-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2 1333 ; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm0,2), %ymm1 1334 ; X64-NEXT: vmovapd %ymm1, %ymm0 1335 ; X64-NEXT: retq 1336 %arg0 = bitcast double *%a0 to i8* 1337 %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0) 1338 %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %arg0, <4 x i64> %a1, <4 x double> %mask, i8 2) 1339 ret <4 x double> %call 1340 } 1341 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly 1342 1343 define <4 x double> @test_mm256_mask_i64gather_pd(<4 x double> %a0, i64 *%a1, <4 x i64> %a2, <4 x double> %a3) { 1344 ; X86-LABEL: test_mm256_mask_i64gather_pd: 1345 ; X86: # %bb.0: 1346 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1347 ; X86-NEXT: vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0 1348 ; X86-NEXT: retl 1349 ; 1350 ; X64-LABEL: test_mm256_mask_i64gather_pd: 1351 ; X64: # %bb.0: 1352 ; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0 1353 ; X64-NEXT: retq 1354 %arg1 = bitcast i64 *%a1 to i8* 1355 %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %arg1, <4 x i64> %a2, <4 x double> %a3, i8 2) 1356 ret <4 x double> %call 1357 } 1358 1359 define <4 x float> @test_mm_i64gather_ps(float *%a0, <2 x i64> %a1) { 1360 ; X86-LABEL: test_mm_i64gather_ps: 1361 ; X86: # %bb.0: 1362 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1363 ; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1364 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 1365 ; X86-NEXT: vgatherqps %xmm2, (%eax,%xmm0,2), %xmm1 1366 ; X86-NEXT: vmovaps %xmm1, %xmm0 1367 ; X86-NEXT: retl 1368 ; 1369 ; X64-LABEL: test_mm_i64gather_ps: 1370 ; X64: # %bb.0: 1371 ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1372 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 1373 ; X64-NEXT: vgatherqps %xmm2, (%rdi,%xmm0,2), %xmm1 1374 ; X64-NEXT: vmovaps %xmm1, %xmm0 1375 ; X64-NEXT: retq 1376 %arg0 = bitcast float *%a0 to i8* 1377 %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer 1378 %sext = sext <4 x i1> %cmp to <4 x i32> 1379 %mask = bitcast <4 x i32> %sext to <4 x float> 1380 %call = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> undef, i8* %arg0, <2 x i64> %a1, <4 x float> %mask, i8 2) 1381 ret <4 x float> %call 1382 } 1383 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly 1384 1385 define <4 x float> @test_mm_mask_i64gather_ps(<4 x float> %a0, float *%a1, <2 x i64> %a2, <4 x float> %a3) { 1386 ; X86-LABEL: test_mm_mask_i64gather_ps: 1387 ; X86: # %bb.0: 1388 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1389 ; X86-NEXT: vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0 1390 ; X86-NEXT: retl 1391 ; 1392 ; X64-LABEL: test_mm_mask_i64gather_ps: 1393 ; X64: # %bb.0: 1394 ; X64-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 1395 ; X64-NEXT: retq 1396 %arg1 = bitcast float *%a1 to i8* 1397 %call = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %arg1, <2 x i64> %a2, <4 x float> %a3, i8 2) 1398 ret <4 x float> %call 1399 } 1400 1401 define <4 x float> @test_mm256_i64gather_ps(float *%a0, <4 x i64> %a1) { 1402 ; X86-LABEL: test_mm256_i64gather_ps: 1403 ; X86: # %bb.0: 1404 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1405 ; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1406 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 1407 ; X86-NEXT: vgatherqps %xmm2, (%eax,%ymm0,2), %xmm1 1408 ; X86-NEXT: vmovaps %xmm1, %xmm0 1409 ; X86-NEXT: vzeroupper 1410 ; X86-NEXT: retl 1411 ; 1412 ; X64-LABEL: test_mm256_i64gather_ps: 1413 ; X64: # %bb.0: 1414 ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1415 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 1416 ; X64-NEXT: vgatherqps %xmm2, (%rdi,%ymm0,2), %xmm1 1417 ; X64-NEXT: vmovaps %xmm1, %xmm0 1418 ; X64-NEXT: vzeroupper 1419 ; X64-NEXT: retq 1420 %arg0 = bitcast float *%a0 to i8* 1421 %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer 1422 %sext = sext <4 x i1> %cmp to <4 x i32> 1423 %mask = bitcast <4 x i32> %sext to <4 x float> 1424 %call = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8* %arg0, <4 x i64> %a1, <4 x float> %mask, i8 2) 1425 ret <4 x float> %call 1426 } 1427 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly 1428 1429 define <4 x float> @test_mm256_mask_i64gather_ps(<4 x float> %a0, float *%a1, <4 x i64> %a2, <4 x float> %a3) { 1430 ; X86-LABEL: test_mm256_mask_i64gather_ps: 1431 ; X86: # %bb.0: 1432 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1433 ; X86-NEXT: vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0 1434 ; X86-NEXT: vzeroupper 1435 ; X86-NEXT: retl 1436 ; 1437 ; X64-LABEL: test_mm256_mask_i64gather_ps: 1438 ; X64: # %bb.0: 1439 ; X64-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0 1440 ; X64-NEXT: vzeroupper 1441 ; X64-NEXT: retq 1442 %arg1 = bitcast float *%a1 to i8* 1443 %call = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %arg1, <4 x i64> %a2, <4 x float> %a3, i8 2) 1444 ret <4 x float> %call 1445 } 1446 1447 define <4 x i64> @test0_mm256_inserti128_si256(<4 x i64> %a0, <2 x i64> %a1) nounwind { 1448 ; CHECK-LABEL: test0_mm256_inserti128_si256: 1449 ; CHECK: # %bb.0: 1450 ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 1451 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1452 ; CHECK-NEXT: ret{{[l|q]}} 1453 %ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1454 %res = shufflevector <4 x i64> %a0, <4 x i64> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1455 ret <4 x i64> %res 1456 } 1457 1458 define <4 x i64> @test1_mm256_inserti128_si256(<4 x i64> %a0, <2 x i64> %a1) nounwind { 1459 ; CHECK-LABEL: test1_mm256_inserti128_si256: 1460 ; CHECK: # %bb.0: 1461 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1462 ; CHECK-NEXT: ret{{[l|q]}} 1463 %ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1464 %res = shufflevector <4 x i64> %a0, <4 x i64> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1465 ret <4 x i64> %res 1466 } 1467 1468 define <4 x i64> @test_mm256_madd_epi16(<4 x i64> %a0, <4 x i64> %a1) { 1469 ; CHECK-LABEL: test_mm256_madd_epi16: 1470 ; CHECK: # %bb.0: 1471 ; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 1472 ; CHECK-NEXT: ret{{[l|q]}} 1473 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1474 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1475 %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %arg0, <16 x i16> %arg1) 1476 %bc = bitcast <8 x i32> %res to <4 x i64> 1477 ret <4 x i64> %bc 1478 } 1479 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone 1480 1481 define <4 x i64> @test_mm256_maddubs_epi16(<4 x i64> %a0, <4 x i64> %a1) { 1482 ; CHECK-LABEL: test_mm256_maddubs_epi16: 1483 ; CHECK: # %bb.0: 1484 ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 1485 ; CHECK-NEXT: ret{{[l|q]}} 1486 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 1487 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 1488 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %arg0, <32 x i8> %arg1) 1489 %bc = bitcast <16 x i16> %res to <4 x i64> 1490 ret <4 x i64> %bc 1491 } 1492 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone 1493 1494 define <2 x i64> @test_mm_maskload_epi32(i32* %a0, <2 x i64> %a1) nounwind { 1495 ; X86-LABEL: test_mm_maskload_epi32: 1496 ; X86: # %bb.0: 1497 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1498 ; X86-NEXT: vpmaskmovd (%eax), %xmm0, %xmm0 1499 ; X86-NEXT: retl 1500 ; 1501 ; X64-LABEL: test_mm_maskload_epi32: 1502 ; X64: # %bb.0: 1503 ; X64-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm0 1504 ; X64-NEXT: retq 1505 %arg0 = bitcast i32* %a0 to i8* 1506 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 1507 %call = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %arg0, <4 x i32> %arg1) 1508 %bc = bitcast <4 x i32> %call to <2 x i64> 1509 ret <2 x i64> %bc 1510 } 1511 declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly 1512 1513 define <4 x i64> @test_mm256_maskload_epi32(i32* %a0, <4 x i64> %a1) nounwind { 1514 ; X86-LABEL: test_mm256_maskload_epi32: 1515 ; X86: # %bb.0: 1516 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1517 ; X86-NEXT: vpmaskmovd (%eax), %ymm0, %ymm0 1518 ; X86-NEXT: retl 1519 ; 1520 ; X64-LABEL: test_mm256_maskload_epi32: 1521 ; X64: # %bb.0: 1522 ; X64-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm0 1523 ; X64-NEXT: retq 1524 %arg0 = bitcast i32* %a0 to i8* 1525 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1526 %call = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %arg0, <8 x i32> %arg1) 1527 %bc = bitcast <8 x i32> %call to <4 x i64> 1528 ret <4 x i64> %bc 1529 } 1530 declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly 1531 1532 define <2 x i64> @test_mm_maskload_epi64(i64* %a0, <2 x i64> %a1) nounwind { 1533 ; X86-LABEL: test_mm_maskload_epi64: 1534 ; X86: # %bb.0: 1535 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1536 ; X86-NEXT: vpmaskmovq (%eax), %xmm0, %xmm0 1537 ; X86-NEXT: retl 1538 ; 1539 ; X64-LABEL: test_mm_maskload_epi64: 1540 ; X64: # %bb.0: 1541 ; X64-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm0 1542 ; X64-NEXT: retq 1543 %arg0 = bitcast i64* %a0 to i8* 1544 %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %arg0, <2 x i64> %a1) 1545 ret <2 x i64> %res 1546 } 1547 declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly 1548 1549 define <4 x i64> @test_mm256_maskload_epi64(i64* %a0, <4 x i64> %a1) nounwind { 1550 ; X86-LABEL: test_mm256_maskload_epi64: 1551 ; X86: # %bb.0: 1552 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1553 ; X86-NEXT: vpmaskmovq (%eax), %ymm0, %ymm0 1554 ; X86-NEXT: retl 1555 ; 1556 ; X64-LABEL: test_mm256_maskload_epi64: 1557 ; X64: # %bb.0: 1558 ; X64-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0 1559 ; X64-NEXT: retq 1560 %arg0 = bitcast i64* %a0 to i8* 1561 %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %arg0, <4 x i64> %a1) 1562 ret <4 x i64> %res 1563 } 1564 declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly 1565 1566 define void @test_mm_maskstore_epi32(float* %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 1567 ; X86-LABEL: test_mm_maskstore_epi32: 1568 ; X86: # %bb.0: 1569 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1570 ; X86-NEXT: vpmaskmovd %xmm1, %xmm0, (%eax) 1571 ; X86-NEXT: retl 1572 ; 1573 ; X64-LABEL: test_mm_maskstore_epi32: 1574 ; X64: # %bb.0: 1575 ; X64-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) 1576 ; X64-NEXT: retq 1577 %arg0 = bitcast float* %a0 to i8* 1578 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 1579 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 1580 call void @llvm.x86.avx2.maskstore.d(i8* %arg0, <4 x i32> %arg1, <4 x i32> %arg2) 1581 ret void 1582 } 1583 declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind readnone 1584 1585 define void @test_mm256_maskstore_epi32(float* %a0, <4 x i64> %a1, <4 x i64> %a2) nounwind { 1586 ; X86-LABEL: test_mm256_maskstore_epi32: 1587 ; X86: # %bb.0: 1588 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1589 ; X86-NEXT: vpmaskmovd %ymm1, %ymm0, (%eax) 1590 ; X86-NEXT: vzeroupper 1591 ; X86-NEXT: retl 1592 ; 1593 ; X64-LABEL: test_mm256_maskstore_epi32: 1594 ; X64: # %bb.0: 1595 ; X64-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) 1596 ; X64-NEXT: vzeroupper 1597 ; X64-NEXT: retq 1598 %arg0 = bitcast float* %a0 to i8* 1599 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1600 %arg2 = bitcast <4 x i64> %a2 to <8 x i32> 1601 call void @llvm.x86.avx2.maskstore.d.256(i8* %arg0, <8 x i32> %arg1, <8 x i32> %arg2) 1602 ret void 1603 } 1604 declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind readnone 1605 1606 define void @test_mm_maskstore_epi64(i64* %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 1607 ; X86-LABEL: test_mm_maskstore_epi64: 1608 ; X86: # %bb.0: 1609 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1610 ; X86-NEXT: vpmaskmovq %xmm1, %xmm0, (%eax) 1611 ; X86-NEXT: retl 1612 ; 1613 ; X64-LABEL: test_mm_maskstore_epi64: 1614 ; X64: # %bb.0: 1615 ; X64-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) 1616 ; X64-NEXT: retq 1617 %arg0 = bitcast i64* %a0 to i8* 1618 call void @llvm.x86.avx2.maskstore.q(i8* %arg0, <2 x i64> %a1, <2 x i64> %a2) 1619 ret void 1620 } 1621 declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind readnone 1622 1623 define void @test_mm256_maskstore_epi64(i64* %a0, <4 x i64> %a1, <4 x i64> %a2) nounwind { 1624 ; X86-LABEL: test_mm256_maskstore_epi64: 1625 ; X86: # %bb.0: 1626 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1627 ; X86-NEXT: vpmaskmovq %ymm1, %ymm0, (%eax) 1628 ; X86-NEXT: vzeroupper 1629 ; X86-NEXT: retl 1630 ; 1631 ; X64-LABEL: test_mm256_maskstore_epi64: 1632 ; X64: # %bb.0: 1633 ; X64-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) 1634 ; X64-NEXT: vzeroupper 1635 ; X64-NEXT: retq 1636 %arg0 = bitcast i64* %a0 to i8* 1637 call void @llvm.x86.avx2.maskstore.q.256(i8* %arg0, <4 x i64> %a1, <4 x i64> %a2) 1638 ret void 1639 } 1640 declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind readnone 1641 1642 define <4 x i64> @test_mm256_max_epi8(<4 x i64> %a0, <4 x i64> %a1) { 1643 ; CHECK-LABEL: test_mm256_max_epi8: 1644 ; CHECK: # %bb.0: 1645 ; CHECK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1646 ; CHECK-NEXT: ret{{[l|q]}} 1647 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 1648 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 1649 %cmp = icmp sgt <32 x i8> %arg0, %arg1 1650 %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1 1651 %bc = bitcast <32 x i8> %sel to <4 x i64> 1652 ret <4 x i64> %bc 1653 } 1654 1655 define <4 x i64> @test_mm256_max_epi16(<4 x i64> %a0, <4 x i64> %a1) { 1656 ; CHECK-LABEL: test_mm256_max_epi16: 1657 ; CHECK: # %bb.0: 1658 ; CHECK-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1659 ; CHECK-NEXT: ret{{[l|q]}} 1660 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1661 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1662 %cmp = icmp sgt <16 x i16> %arg0, %arg1 1663 %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1 1664 %bc = bitcast <16 x i16> %sel to <4 x i64> 1665 ret <4 x i64> %bc 1666 } 1667 1668 define <4 x i64> @test_mm256_max_epi32(<4 x i64> %a0, <4 x i64> %a1) { 1669 ; CHECK-LABEL: test_mm256_max_epi32: 1670 ; CHECK: # %bb.0: 1671 ; CHECK-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1672 ; CHECK-NEXT: ret{{[l|q]}} 1673 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 1674 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1675 %cmp = icmp sgt <8 x i32> %arg0, %arg1 1676 %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1 1677 %bc = bitcast <8 x i32> %sel to <4 x i64> 1678 ret <4 x i64> %bc 1679 } 1680 1681 define <4 x i64> @test_mm256_max_epu8(<4 x i64> %a0, <4 x i64> %a1) { 1682 ; CHECK-LABEL: test_mm256_max_epu8: 1683 ; CHECK: # %bb.0: 1684 ; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 1685 ; CHECK-NEXT: ret{{[l|q]}} 1686 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 1687 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 1688 %cmp = icmp ugt <32 x i8> %arg0, %arg1 1689 %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1 1690 %bc = bitcast <32 x i8> %sel to <4 x i64> 1691 ret <4 x i64> %bc 1692 } 1693 1694 define <4 x i64> @test_mm256_max_epu16(<4 x i64> %a0, <4 x i64> %a1) { 1695 ; CHECK-LABEL: test_mm256_max_epu16: 1696 ; CHECK: # %bb.0: 1697 ; CHECK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 1698 ; CHECK-NEXT: ret{{[l|q]}} 1699 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1700 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1701 %cmp = icmp ugt <16 x i16> %arg0, %arg1 1702 %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1 1703 %bc = bitcast <16 x i16> %sel to <4 x i64> 1704 ret <4 x i64> %bc 1705 } 1706 1707 define <4 x i64> @test_mm256_max_epu32(<4 x i64> %a0, <4 x i64> %a1) { 1708 ; CHECK-LABEL: test_mm256_max_epu32: 1709 ; CHECK: # %bb.0: 1710 ; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 1711 ; CHECK-NEXT: ret{{[l|q]}} 1712 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 1713 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1714 %cmp = icmp ugt <8 x i32> %arg0, %arg1 1715 %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1 1716 %bc = bitcast <8 x i32> %sel to <4 x i64> 1717 ret <4 x i64> %bc 1718 } 1719 1720 define <4 x i64> @test_mm256_min_epi8(<4 x i64> %a0, <4 x i64> %a1) { 1721 ; CHECK-LABEL: test_mm256_min_epi8: 1722 ; CHECK: # %bb.0: 1723 ; CHECK-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1724 ; CHECK-NEXT: ret{{[l|q]}} 1725 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 1726 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 1727 %cmp = icmp slt <32 x i8> %arg0, %arg1 1728 %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1 1729 %bc = bitcast <32 x i8> %sel to <4 x i64> 1730 ret <4 x i64> %bc 1731 } 1732 1733 define <4 x i64> @test_mm256_min_epi16(<4 x i64> %a0, <4 x i64> %a1) { 1734 ; CHECK-LABEL: test_mm256_min_epi16: 1735 ; CHECK: # %bb.0: 1736 ; CHECK-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1737 ; CHECK-NEXT: ret{{[l|q]}} 1738 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1739 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1740 %cmp = icmp slt <16 x i16> %arg0, %arg1 1741 %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1 1742 %bc = bitcast <16 x i16> %sel to <4 x i64> 1743 ret <4 x i64> %bc 1744 } 1745 1746 define <4 x i64> @test_mm256_min_epi32(<4 x i64> %a0, <4 x i64> %a1) { 1747 ; CHECK-LABEL: test_mm256_min_epi32: 1748 ; CHECK: # %bb.0: 1749 ; CHECK-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1750 ; CHECK-NEXT: ret{{[l|q]}} 1751 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 1752 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1753 %cmp = icmp slt <8 x i32> %arg0, %arg1 1754 %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1 1755 %bc = bitcast <8 x i32> %sel to <4 x i64> 1756 ret <4 x i64> %bc 1757 } 1758 1759 define <4 x i64> @test_mm256_min_epu8(<4 x i64> %a0, <4 x i64> %a1) { 1760 ; CHECK-LABEL: test_mm256_min_epu8: 1761 ; CHECK: # %bb.0: 1762 ; CHECK-NEXT: vpminub %ymm1, %ymm0, %ymm0 1763 ; CHECK-NEXT: ret{{[l|q]}} 1764 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 1765 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 1766 %cmp = icmp ult <32 x i8> %arg0, %arg1 1767 %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1 1768 %bc = bitcast <32 x i8> %sel to <4 x i64> 1769 ret <4 x i64> %bc 1770 } 1771 1772 define <4 x i64> @test_mm256_min_epu16(<4 x i64> %a0, <4 x i64> %a1) { 1773 ; CHECK-LABEL: test_mm256_min_epu16: 1774 ; CHECK: # %bb.0: 1775 ; CHECK-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1776 ; CHECK-NEXT: ret{{[l|q]}} 1777 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1778 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1779 %cmp = icmp ult <16 x i16> %arg0, %arg1 1780 %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1 1781 %bc = bitcast <16 x i16> %sel to <4 x i64> 1782 ret <4 x i64> %bc 1783 } 1784 1785 define <4 x i64> @test_mm256_min_epu32(<4 x i64> %a0, <4 x i64> %a1) { 1786 ; CHECK-LABEL: test_mm256_min_epu32: 1787 ; CHECK: # %bb.0: 1788 ; CHECK-NEXT: vpminud %ymm1, %ymm0, %ymm0 1789 ; CHECK-NEXT: ret{{[l|q]}} 1790 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 1791 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1792 %cmp = icmp ult <8 x i32> %arg0, %arg1 1793 %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1 1794 %bc = bitcast <8 x i32> %sel to <4 x i64> 1795 ret <4 x i64> %bc 1796 } 1797 1798 define i32 @test_mm256_movemask_epi8(<4 x i64> %a0) nounwind { 1799 ; CHECK-LABEL: test_mm256_movemask_epi8: 1800 ; CHECK: # %bb.0: 1801 ; CHECK-NEXT: vpmovmskb %ymm0, %eax 1802 ; CHECK-NEXT: vzeroupper 1803 ; CHECK-NEXT: ret{{[l|q]}} 1804 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 1805 %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %arg0) 1806 ret i32 %res 1807 } 1808 declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone 1809 1810 define <4 x i64> @test_mm256_mpsadbw_epu8(<4 x i64> %a0, <4 x i64> %a1) { 1811 ; CHECK-LABEL: test_mm256_mpsadbw_epu8: 1812 ; CHECK: # %bb.0: 1813 ; CHECK-NEXT: vmpsadbw $3, %ymm1, %ymm0, %ymm0 1814 ; CHECK-NEXT: ret{{[l|q]}} 1815 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 1816 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 1817 %call = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %arg0, <32 x i8> %arg1, i8 3) 1818 %bc = bitcast <16 x i16> %call to <4 x i64> 1819 ret <4 x i64> %bc 1820 } 1821 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone 1822 1823 define <4 x i64> @test_mm256_mul_epi32(<4 x i64> %a0, <4 x i64> %a1) { 1824 ; CHECK-LABEL: test_mm256_mul_epi32: 1825 ; CHECK: # %bb.0: 1826 ; CHECK-NEXT: vpsllq $32, %ymm0, %ymm0 1827 ; CHECK-NEXT: vpsrad $31, %ymm0, %ymm2 1828 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1829 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] 1830 ; CHECK-NEXT: vpsllq $32, %ymm1, %ymm1 1831 ; CHECK-NEXT: vpsrad $31, %ymm1, %ymm2 1832 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] 1833 ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] 1834 ; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 1835 ; CHECK-NEXT: ret{{[l|q]}} 1836 %A = shl <4 x i64> %a0, <i64 32, i64 32, i64 32, i64 32> 1837 %A1 = ashr exact <4 x i64> %A, <i64 32, i64 32, i64 32, i64 32> 1838 %B = shl <4 x i64> %a1, <i64 32, i64 32, i64 32, i64 32> 1839 %B1 = ashr exact <4 x i64> %B, <i64 32, i64 32, i64 32, i64 32> 1840 %res = mul nsw <4 x i64> %A1, %B1 1841 ret <4 x i64> %res 1842 } 1843 declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone 1844 1845 define <4 x i64> @test_mm256_mul_epu32(<4 x i64> %a0, <4 x i64> %a1) { 1846 ; CHECK-LABEL: test_mm256_mul_epu32: 1847 ; CHECK: # %bb.0: 1848 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 1849 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] 1850 ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] 1851 ; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 1852 ; CHECK-NEXT: ret{{[l|q]}} 1853 %A = and <4 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295> 1854 %B = and <4 x i64> %a1, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295> 1855 %res = mul nuw <4 x i64> %A, %B 1856 ret <4 x i64> %res 1857 } 1858 declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone 1859 1860 define <4 x i64> @test_mm256_mulhi_epi16(<4 x i64> %a0, <4 x i64> %a1) { 1861 ; CHECK-LABEL: test_mm256_mulhi_epi16: 1862 ; CHECK: # %bb.0: 1863 ; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 1864 ; CHECK-NEXT: ret{{[l|q]}} 1865 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1866 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1867 %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %arg0, <16 x i16> %arg1) 1868 %bc = bitcast <16 x i16> %res to <4 x i64> 1869 ret <4 x i64> %bc 1870 } 1871 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone 1872 1873 define <4 x i64> @test_mm256_mulhi_epu16(<4 x i64> %a0, <4 x i64> %a1) { 1874 ; CHECK-LABEL: test_mm256_mulhi_epu16: 1875 ; CHECK: # %bb.0: 1876 ; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 1877 ; CHECK-NEXT: ret{{[l|q]}} 1878 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1879 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1880 %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %arg0, <16 x i16> %arg1) 1881 %bc = bitcast <16 x i16> %res to <4 x i64> 1882 ret <4 x i64> %bc 1883 } 1884 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone 1885 1886 define <4 x i64> @test_mm256_mulhrs_epi16(<4 x i64> %a0, <4 x i64> %a1) { 1887 ; CHECK-LABEL: test_mm256_mulhrs_epi16: 1888 ; CHECK: # %bb.0: 1889 ; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 1890 ; CHECK-NEXT: ret{{[l|q]}} 1891 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1892 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1893 %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %arg0, <16 x i16> %arg1) 1894 %bc = bitcast <16 x i16> %res to <4 x i64> 1895 ret <4 x i64> %bc 1896 } 1897 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone 1898 1899 define <4 x i64> @test_mm256_mullo_epi16(<4 x i64> %a0, <4 x i64> %a1) { 1900 ; CHECK-LABEL: test_mm256_mullo_epi16: 1901 ; CHECK: # %bb.0: 1902 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1903 ; CHECK-NEXT: ret{{[l|q]}} 1904 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1905 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1906 %res = mul <16 x i16> %arg0, %arg1 1907 %bc = bitcast <16 x i16> %res to <4 x i64> 1908 ret <4 x i64> %bc 1909 } 1910 1911 define <4 x i64> @test_mm256_mullo_epi32(<4 x i64> %a0, <4 x i64> %a1) { 1912 ; CHECK-LABEL: test_mm256_mullo_epi32: 1913 ; CHECK: # %bb.0: 1914 ; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0 1915 ; CHECK-NEXT: ret{{[l|q]}} 1916 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 1917 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1918 %res = mul <8 x i32> %arg0, %arg1 1919 %bc = bitcast <8 x i32> %res to <4 x i64> 1920 ret <4 x i64> %bc 1921 } 1922 1923 define <4 x i64> @test_mm256_or_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { 1924 ; CHECK-LABEL: test_mm256_or_si256: 1925 ; CHECK: # %bb.0: 1926 ; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 1927 ; CHECK-NEXT: ret{{[l|q]}} 1928 %res = or <4 x i64> %a0, %a1 1929 ret <4 x i64> %res 1930 } 1931 1932 define <4 x i64> @test_mm256_packs_epi16(<4 x i64> %a0, <4 x i64> %a1) { 1933 ; CHECK-LABEL: test_mm256_packs_epi16: 1934 ; CHECK: # %bb.0: 1935 ; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 1936 ; CHECK-NEXT: ret{{[l|q]}} 1937 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1938 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1939 %call = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %arg0, <16 x i16> %arg1) 1940 %res = bitcast <32 x i8> %call to <4 x i64> 1941 ret <4 x i64> %res 1942 } 1943 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone 1944 1945 define <4 x i64> @test_mm256_packs_epi32(<4 x i64> %a0, <4 x i64> %a1) { 1946 ; CHECK-LABEL: test_mm256_packs_epi32: 1947 ; CHECK: # %bb.0: 1948 ; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1949 ; CHECK-NEXT: ret{{[l|q]}} 1950 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 1951 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1952 %call = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %arg0, <8 x i32> %arg1) 1953 %res = bitcast <16 x i16> %call to <4 x i64> 1954 ret <4 x i64> %res 1955 } 1956 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone 1957 1958 define <4 x i64> @test_mm256_packus_epi16(<4 x i64> %a0, <4 x i64> %a1) { 1959 ; CHECK-LABEL: test_mm256_packus_epi16: 1960 ; CHECK: # %bb.0: 1961 ; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 1962 ; CHECK-NEXT: ret{{[l|q]}} 1963 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1964 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 1965 %call = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %arg0, <16 x i16> %arg1) 1966 %res = bitcast <32 x i8> %call to <4 x i64> 1967 ret <4 x i64> %res 1968 } 1969 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone 1970 1971 define <4 x i64> @test_mm256_packus_epi32(<4 x i64> %a0, <4 x i64> %a1) { 1972 ; CHECK-LABEL: test_mm256_packus_epi32: 1973 ; CHECK: # %bb.0: 1974 ; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 1975 ; CHECK-NEXT: ret{{[l|q]}} 1976 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 1977 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1978 %call = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %arg0, <8 x i32> %arg1) 1979 %res = bitcast <16 x i16> %call to <4 x i64> 1980 ret <4 x i64> %res 1981 } 1982 declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone 1983 1984 define <4 x i64> @test_mm256_permute2x128_si256(<4 x i64> %a0, <4 x i64> %a1) { 1985 ; CHECK-LABEL: test_mm256_permute2x128_si256: 1986 ; CHECK: # %bb.0: 1987 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1988 ; CHECK-NEXT: ret{{[l|q]}} 1989 %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1990 ret <4 x i64> %res 1991 } 1992 declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly 1993 1994 define <4 x i64> @test_mm256_permute4x64_epi64(<4 x i64> %a0) { 1995 ; CHECK-LABEL: test_mm256_permute4x64_epi64: 1996 ; CHECK: # %bb.0: 1997 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,2,0] 1998 ; CHECK-NEXT: ret{{[l|q]}} 1999 %res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 0> 2000 ret <4 x i64> %res 2001 } 2002 2003 define <4 x double> @test_mm256_permute4x64_pd(<4 x double> %a0) { 2004 ; CHECK-LABEL: test_mm256_permute4x64_pd: 2005 ; CHECK: # %bb.0: 2006 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,1,0] 2007 ; CHECK-NEXT: ret{{[l|q]}} 2008 %res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 0> 2009 ret <4 x double> %res 2010 } 2011 2012 define <4 x i64> @test_mm256_permutevar8x32_epi32(<4 x i64> %a0, <4 x i64> %a1) { 2013 ; CHECK-LABEL: test_mm256_permutevar8x32_epi32: 2014 ; CHECK: # %bb.0: 2015 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 2016 ; CHECK-NEXT: ret{{[l|q]}} 2017 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2018 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 2019 %call = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %arg0, <8 x i32> %arg1) 2020 %res = bitcast <8 x i32> %call to <4 x i64> 2021 ret <4 x i64> %res 2022 } 2023 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly 2024 2025 define <8 x float> @test_mm256_permutevar8x32_ps(<8 x float> %a0, <4 x i64> %a1) { 2026 ; CHECK-LABEL: test_mm256_permutevar8x32_ps: 2027 ; CHECK: # %bb.0: 2028 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 2029 ; CHECK-NEXT: ret{{[l|q]}} 2030 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 2031 %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %arg1) 2032 ret <8 x float> %res 2033 } 2034 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly 2035 2036 define <4 x i64> @test_mm256_sad_epu8(<4 x i64> %a0, <4 x i64> %a1) { 2037 ; CHECK-LABEL: test_mm256_sad_epu8: 2038 ; CHECK: # %bb.0: 2039 ; CHECK-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 2040 ; CHECK-NEXT: ret{{[l|q]}} 2041 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 2042 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 2043 %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %arg0, <32 x i8> %arg1) 2044 ret <4 x i64> %res 2045 } 2046 declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone 2047 2048 define <4 x i64> @test_mm256_shuffle_epi32(<4 x i64> %a0) { 2049 ; CHECK-LABEL: test_mm256_shuffle_epi32: 2050 ; CHECK: # %bb.0: 2051 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4] 2052 ; CHECK-NEXT: ret{{[l|q]}} 2053 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2054 %shuf = shufflevector <8 x i32> %arg0, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4> 2055 %res = bitcast <8 x i32> %shuf to <4 x i64> 2056 ret <4 x i64> %res 2057 } 2058 2059 define <4 x i64> @test_mm256_shuffle_epi8(<4 x i64> %a0, <4 x i64> %a1) { 2060 ; CHECK-LABEL: test_mm256_shuffle_epi8: 2061 ; CHECK: # %bb.0: 2062 ; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm0 2063 ; CHECK-NEXT: ret{{[l|q]}} 2064 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 2065 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 2066 %shuf = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %arg0, <32 x i8> %arg1) 2067 %res = bitcast <32 x i8> %shuf to <4 x i64> 2068 ret <4 x i64> %res 2069 } 2070 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone 2071 2072 define <4 x i64> @test_mm256_shufflehi_epi16(<4 x i64> %a0) { 2073 ; CHECK-LABEL: test_mm256_shufflehi_epi16: 2074 ; CHECK: # %bb.0: 2075 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,6,5,8,9,10,11,15,14,14,13] 2076 ; CHECK-NEXT: ret{{[l|q]}} 2077 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2078 %shuf = shufflevector <16 x i16> %arg0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13> 2079 %res = bitcast <16 x i16> %shuf to <4 x i64> 2080 ret <4 x i64> %res 2081 } 2082 2083 define <4 x i64> @test_mm256_shufflelo_epi16(<4 x i64> %a0) { 2084 ; CHECK-LABEL: test_mm256_shufflelo_epi16: 2085 ; CHECK: # %bb.0: 2086 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,0,1,1,4,5,6,7,11,8,9,9,12,13,14,15] 2087 ; CHECK-NEXT: ret{{[l|q]}} 2088 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2089 %shuf = shufflevector <16 x i16> %arg0, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15> 2090 %res = bitcast <16 x i16> %shuf to <4 x i64> 2091 ret <4 x i64> %res 2092 } 2093 2094 define <4 x i64> @test_mm256_sign_epi8(<4 x i64> %a0, <4 x i64> %a1) { 2095 ; CHECK-LABEL: test_mm256_sign_epi8: 2096 ; CHECK: # %bb.0: 2097 ; CHECK-NEXT: vpsignb %ymm1, %ymm0, %ymm0 2098 ; CHECK-NEXT: ret{{[l|q]}} 2099 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 2100 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 2101 %call = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %arg0, <32 x i8> %arg1) 2102 %res = bitcast <32 x i8> %call to <4 x i64> 2103 ret <4 x i64> %res 2104 } 2105 declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone 2106 2107 define <4 x i64> @test_mm256_sign_epi16(<4 x i64> %a0, <4 x i64> %a1) { 2108 ; CHECK-LABEL: test_mm256_sign_epi16: 2109 ; CHECK: # %bb.0: 2110 ; CHECK-NEXT: vpsignw %ymm1, %ymm0, %ymm0 2111 ; CHECK-NEXT: ret{{[l|q]}} 2112 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2113 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 2114 %call = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %arg0, <16 x i16> %arg1) 2115 %res = bitcast <16 x i16> %call to <4 x i64> 2116 ret <4 x i64> %res 2117 } 2118 declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone 2119 2120 define <4 x i64> @test_mm256_sign_epi32(<4 x i64> %a0, <4 x i64> %a1) { 2121 ; CHECK-LABEL: test_mm256_sign_epi32: 2122 ; CHECK: # %bb.0: 2123 ; CHECK-NEXT: vpsignd %ymm1, %ymm0, %ymm0 2124 ; CHECK-NEXT: ret{{[l|q]}} 2125 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2126 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 2127 %call = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %arg0, <8 x i32> %arg1) 2128 %res = bitcast <8 x i32> %call to <4 x i64> 2129 ret <4 x i64> %res 2130 } 2131 declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone 2132 2133 define <4 x i64> @test_mm256_sll_epi16(<4 x i64> %a0, <2 x i64> %a1) { 2134 ; CHECK-LABEL: test_mm256_sll_epi16: 2135 ; CHECK: # %bb.0: 2136 ; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0 2137 ; CHECK-NEXT: ret{{[l|q]}} 2138 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2139 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2140 %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %arg0, <8 x i16> %arg1) 2141 %bc = bitcast <16 x i16> %res to <4 x i64> 2142 ret <4 x i64> %bc 2143 } 2144 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone 2145 2146 define <4 x i64> @test_mm256_sll_epi32(<4 x i64> %a0, <2 x i64> %a1) { 2147 ; CHECK-LABEL: test_mm256_sll_epi32: 2148 ; CHECK: # %bb.0: 2149 ; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm0 2150 ; CHECK-NEXT: ret{{[l|q]}} 2151 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2152 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2153 %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %arg0, <4 x i32> %arg1) 2154 %bc = bitcast <8 x i32> %res to <4 x i64> 2155 ret <4 x i64> %bc 2156 } 2157 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone 2158 2159 define <4 x i64> @test_mm256_sll_epi64(<4 x i64> %a0, <2 x i64> %a1) { 2160 ; CHECK-LABEL: test_mm256_sll_epi64: 2161 ; CHECK: # %bb.0: 2162 ; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm0 2163 ; CHECK-NEXT: ret{{[l|q]}} 2164 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) 2165 ret <4 x i64> %res 2166 } 2167 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone 2168 2169 define <4 x i64> @test_mm256_slli_epi16(<4 x i64> %a0) { 2170 ; CHECK-LABEL: test_mm256_slli_epi16: 2171 ; CHECK: # %bb.0: 2172 ; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0 2173 ; CHECK-NEXT: ret{{[l|q]}} 2174 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2175 %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %arg0, i32 3) 2176 %bc = bitcast <16 x i16> %res to <4 x i64> 2177 ret <4 x i64> %bc 2178 } 2179 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone 2180 2181 define <4 x i64> @test_mm256_slli_epi32(<4 x i64> %a0) { 2182 ; CHECK-LABEL: test_mm256_slli_epi32: 2183 ; CHECK: # %bb.0: 2184 ; CHECK-NEXT: vpslld $3, %ymm0, %ymm0 2185 ; CHECK-NEXT: ret{{[l|q]}} 2186 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2187 %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %arg0, i32 3) 2188 %bc = bitcast <8 x i32> %res to <4 x i64> 2189 ret <4 x i64> %bc 2190 } 2191 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone 2192 2193 define <4 x i64> @test_mm256_slli_epi64(<4 x i64> %a0) { 2194 ; CHECK-LABEL: test_mm256_slli_epi64: 2195 ; CHECK: # %bb.0: 2196 ; CHECK-NEXT: vpsllq $3, %ymm0, %ymm0 2197 ; CHECK-NEXT: ret{{[l|q]}} 2198 %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 3) 2199 ret <4 x i64> %res 2200 } 2201 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone 2202 2203 define <4 x i64> @test_mm256_slli_si256(<4 x i64> %a0) { 2204 ; CHECK-LABEL: test_mm256_slli_si256: 2205 ; CHECK: # %bb.0: 2206 ; CHECK-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] 2207 ; CHECK-NEXT: ret{{[l|q]}} 2208 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 2209 %shuf = shufflevector <32 x i8> zeroinitializer, <32 x i8> %arg0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60> 2210 %res = bitcast <32 x i8> %shuf to <4 x i64> 2211 ret <4 x i64> %res 2212 } 2213 2214 define <2 x i64> @test_mm_sllv_epi32(<2 x i64> %a0, <2 x i64> %a1) { 2215 ; CHECK-LABEL: test_mm_sllv_epi32: 2216 ; CHECK: # %bb.0: 2217 ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 2218 ; CHECK-NEXT: ret{{[l|q]}} 2219 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2220 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2221 %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %arg0, <4 x i32> %arg1) 2222 %bc = bitcast <4 x i32> %res to <2 x i64> 2223 ret <2 x i64> %bc 2224 } 2225 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone 2226 2227 define <4 x i64> @test_mm256_sllv_epi32(<4 x i64> %a0, <4 x i64> %a1) { 2228 ; CHECK-LABEL: test_mm256_sllv_epi32: 2229 ; CHECK: # %bb.0: 2230 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 2231 ; CHECK-NEXT: ret{{[l|q]}} 2232 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2233 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 2234 %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %arg0, <8 x i32> %arg1) 2235 %bc = bitcast <8 x i32> %res to <4 x i64> 2236 ret <4 x i64> %bc 2237 } 2238 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 2239 2240 define <2 x i64> @test_mm_sllv_epi64(<2 x i64> %a0, <2 x i64> %a1) { 2241 ; CHECK-LABEL: test_mm_sllv_epi64: 2242 ; CHECK: # %bb.0: 2243 ; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 2244 ; CHECK-NEXT: ret{{[l|q]}} 2245 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) 2246 ret <2 x i64> %res 2247 } 2248 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone 2249 2250 define <4 x i64> @test_mm256_sllv_epi64(<4 x i64> %a0, <4 x i64> %a1) { 2251 ; CHECK-LABEL: test_mm256_sllv_epi64: 2252 ; CHECK: # %bb.0: 2253 ; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 2254 ; CHECK-NEXT: ret{{[l|q]}} 2255 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) 2256 ret <4 x i64> %res 2257 } 2258 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 2259 2260 define <4 x i64> @test_mm256_sra_epi16(<4 x i64> %a0, <2 x i64> %a1) { 2261 ; CHECK-LABEL: test_mm256_sra_epi16: 2262 ; CHECK: # %bb.0: 2263 ; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm0 2264 ; CHECK-NEXT: ret{{[l|q]}} 2265 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2266 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2267 %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %arg0, <8 x i16> %arg1) 2268 %bc = bitcast <16 x i16> %res to <4 x i64> 2269 ret <4 x i64> %bc 2270 } 2271 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone 2272 2273 define <4 x i64> @test_mm256_sra_epi32(<4 x i64> %a0, <2 x i64> %a1) { 2274 ; CHECK-LABEL: test_mm256_sra_epi32: 2275 ; CHECK: # %bb.0: 2276 ; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm0 2277 ; CHECK-NEXT: ret{{[l|q]}} 2278 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2279 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2280 %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %arg0, <4 x i32> %arg1) 2281 %bc = bitcast <8 x i32> %res to <4 x i64> 2282 ret <4 x i64> %bc 2283 } 2284 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone 2285 2286 define <4 x i64> @test_mm256_srai_epi16(<4 x i64> %a0) { 2287 ; CHECK-LABEL: test_mm256_srai_epi16: 2288 ; CHECK: # %bb.0: 2289 ; CHECK-NEXT: vpsraw $3, %ymm0, %ymm0 2290 ; CHECK-NEXT: ret{{[l|q]}} 2291 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2292 %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %arg0, i32 3) 2293 %bc = bitcast <16 x i16> %res to <4 x i64> 2294 ret <4 x i64> %bc 2295 } 2296 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone 2297 2298 define <4 x i64> @test_mm256_srai_epi32(<4 x i64> %a0) { 2299 ; CHECK-LABEL: test_mm256_srai_epi32: 2300 ; CHECK: # %bb.0: 2301 ; CHECK-NEXT: vpsrad $3, %ymm0, %ymm0 2302 ; CHECK-NEXT: ret{{[l|q]}} 2303 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2304 %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %arg0, i32 3) 2305 %bc = bitcast <8 x i32> %res to <4 x i64> 2306 ret <4 x i64> %bc 2307 } 2308 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone 2309 2310 define <2 x i64> @test_mm_srav_epi32(<2 x i64> %a0, <2 x i64> %a1) { 2311 ; CHECK-LABEL: test_mm_srav_epi32: 2312 ; CHECK: # %bb.0: 2313 ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0 2314 ; CHECK-NEXT: ret{{[l|q]}} 2315 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2316 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2317 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %arg0, <4 x i32> %arg1) 2318 %bc = bitcast <4 x i32> %res to <2 x i64> 2319 ret <2 x i64> %bc 2320 } 2321 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone 2322 2323 define <4 x i64> @test_mm256_srav_epi32(<4 x i64> %a0, <4 x i64> %a1) { 2324 ; CHECK-LABEL: test_mm256_srav_epi32: 2325 ; CHECK: # %bb.0: 2326 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 2327 ; CHECK-NEXT: ret{{[l|q]}} 2328 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2329 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 2330 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %arg0, <8 x i32> %arg1) 2331 %bc = bitcast <8 x i32> %res to <4 x i64> 2332 ret <4 x i64> %bc 2333 } 2334 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone 2335 2336 define <4 x i64> @test_mm256_srl_epi16(<4 x i64> %a0, <2 x i64> %a1) { 2337 ; CHECK-LABEL: test_mm256_srl_epi16: 2338 ; CHECK: # %bb.0: 2339 ; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 2340 ; CHECK-NEXT: ret{{[l|q]}} 2341 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2342 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2343 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %arg0, <8 x i16> %arg1) 2344 %bc = bitcast <16 x i16> %res to <4 x i64> 2345 ret <4 x i64> %bc 2346 } 2347 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone 2348 2349 define <4 x i64> @test_mm256_srl_epi32(<4 x i64> %a0, <2 x i64> %a1) { 2350 ; CHECK-LABEL: test_mm256_srl_epi32: 2351 ; CHECK: # %bb.0: 2352 ; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm0 2353 ; CHECK-NEXT: ret{{[l|q]}} 2354 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2355 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2356 %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %arg0, <4 x i32> %arg1) 2357 %bc = bitcast <8 x i32> %res to <4 x i64> 2358 ret <4 x i64> %bc 2359 } 2360 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone 2361 2362 define <4 x i64> @test_mm256_srl_epi64(<4 x i64> %a0, <2 x i64> %a1) { 2363 ; CHECK-LABEL: test_mm256_srl_epi64: 2364 ; CHECK: # %bb.0: 2365 ; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 2366 ; CHECK-NEXT: ret{{[l|q]}} 2367 %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) 2368 ret <4 x i64> %res 2369 } 2370 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone 2371 2372 define <4 x i64> @test_mm256_srli_epi16(<4 x i64> %a0) { 2373 ; CHECK-LABEL: test_mm256_srli_epi16: 2374 ; CHECK: # %bb.0: 2375 ; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm0 2376 ; CHECK-NEXT: ret{{[l|q]}} 2377 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2378 %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %arg0, i32 3) 2379 %bc = bitcast <16 x i16> %res to <4 x i64> 2380 ret <4 x i64> %bc 2381 } 2382 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone 2383 2384 define <4 x i64> @test_mm256_srli_epi32(<4 x i64> %a0) { 2385 ; CHECK-LABEL: test_mm256_srli_epi32: 2386 ; CHECK: # %bb.0: 2387 ; CHECK-NEXT: vpsrld $3, %ymm0, %ymm0 2388 ; CHECK-NEXT: ret{{[l|q]}} 2389 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2390 %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %arg0, i32 3) 2391 %bc = bitcast <8 x i32> %res to <4 x i64> 2392 ret <4 x i64> %bc 2393 } 2394 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone 2395 2396 define <4 x i64> @test_mm256_srli_epi64(<4 x i64> %a0) { 2397 ; CHECK-LABEL: test_mm256_srli_epi64: 2398 ; CHECK: # %bb.0: 2399 ; CHECK-NEXT: vpsrlq $3, %ymm0, %ymm0 2400 ; CHECK-NEXT: ret{{[l|q]}} 2401 %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 3) 2402 ret <4 x i64> %res 2403 } 2404 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone 2405 2406 define <4 x i64> @test_mm256_srli_si256(<4 x i64> %a0) { 2407 ; CHECK-LABEL: test_mm256_srli_si256: 2408 ; CHECK: # %bb.0: 2409 ; CHECK-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero 2410 ; CHECK-NEXT: ret{{[l|q]}} 2411 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 2412 %shuf = shufflevector <32 x i8> %arg0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50> 2413 %res = bitcast <32 x i8> %shuf to <4 x i64> 2414 ret <4 x i64> %res 2415 } 2416 2417 define <2 x i64> @test_mm_srlv_epi32(<2 x i64> %a0, <2 x i64> %a1) { 2418 ; CHECK-LABEL: test_mm_srlv_epi32: 2419 ; CHECK: # %bb.0: 2420 ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 2421 ; CHECK-NEXT: ret{{[l|q]}} 2422 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2423 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2424 %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %arg0, <4 x i32> %arg1) 2425 %bc = bitcast <4 x i32> %res to <2 x i64> 2426 ret <2 x i64> %bc 2427 } 2428 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone 2429 2430 define <4 x i64> @test_mm256_srlv_epi32(<4 x i64> %a0, <4 x i64> %a1) { 2431 ; CHECK-LABEL: test_mm256_srlv_epi32: 2432 ; CHECK: # %bb.0: 2433 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 2434 ; CHECK-NEXT: ret{{[l|q]}} 2435 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2436 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 2437 %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %arg0, <8 x i32> %arg1) 2438 %bc = bitcast <8 x i32> %res to <4 x i64> 2439 ret <4 x i64> %bc 2440 } 2441 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 2442 2443 define <2 x i64> @test_mm_srlv_epi64(<2 x i64> %a0, <2 x i64> %a1) { 2444 ; CHECK-LABEL: test_mm_srlv_epi64: 2445 ; CHECK: # %bb.0: 2446 ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 2447 ; CHECK-NEXT: ret{{[l|q]}} 2448 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) 2449 ret <2 x i64> %res 2450 } 2451 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone 2452 2453 define <4 x i64> @test_mm256_srlv_epi64(<4 x i64> %a0, <4 x i64> %a1) { 2454 ; CHECK-LABEL: test_mm256_srlv_epi64: 2455 ; CHECK: # %bb.0: 2456 ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 2457 ; CHECK-NEXT: ret{{[l|q]}} 2458 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) 2459 ret <4 x i64> %res 2460 } 2461 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 2462 2463 define <4 x i64> @test_mm256_stream_load_si256(<4 x i64> *%a0) { 2464 ; X86-LABEL: test_mm256_stream_load_si256: 2465 ; X86: # %bb.0: 2466 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2467 ; X86-NEXT: vmovntdqa (%eax), %ymm0 2468 ; X86-NEXT: retl 2469 ; 2470 ; X64-LABEL: test_mm256_stream_load_si256: 2471 ; X64: # %bb.0: 2472 ; X64-NEXT: vmovntdqa (%rdi), %ymm0 2473 ; X64-NEXT: retq 2474 %arg0 = bitcast <4 x i64> *%a0 to i8* 2475 %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %arg0) 2476 ret <4 x i64> %res 2477 } 2478 declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly 2479 2480 define <4 x i64> @test_mm256_sub_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2481 ; CHECK-LABEL: test_mm256_sub_epi8: 2482 ; CHECK: # %bb.0: 2483 ; CHECK-NEXT: vpsubb %ymm1, %ymm0, %ymm0 2484 ; CHECK-NEXT: ret{{[l|q]}} 2485 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 2486 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 2487 %res = sub <32 x i8> %arg0, %arg1 2488 %bc = bitcast <32 x i8> %res to <4 x i64> 2489 ret <4 x i64> %bc 2490 } 2491 2492 define <4 x i64> @test_mm256_sub_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2493 ; CHECK-LABEL: test_mm256_sub_epi16: 2494 ; CHECK: # %bb.0: 2495 ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 2496 ; CHECK-NEXT: ret{{[l|q]}} 2497 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2498 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 2499 %res = sub <16 x i16> %arg0, %arg1 2500 %bc = bitcast <16 x i16> %res to <4 x i64> 2501 ret <4 x i64> %bc 2502 } 2503 2504 define <4 x i64> @test_mm256_sub_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2505 ; CHECK-LABEL: test_mm256_sub_epi32: 2506 ; CHECK: # %bb.0: 2507 ; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 2508 ; CHECK-NEXT: ret{{[l|q]}} 2509 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2510 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 2511 %res = sub <8 x i32> %arg0, %arg1 2512 %bc = bitcast <8 x i32> %res to <4 x i64> 2513 ret <4 x i64> %bc 2514 } 2515 2516 define <4 x i64> @test_mm256_sub_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2517 ; CHECK-LABEL: test_mm256_sub_epi64: 2518 ; CHECK: # %bb.0: 2519 ; CHECK-NEXT: vpsubq %ymm1, %ymm0, %ymm0 2520 ; CHECK-NEXT: ret{{[l|q]}} 2521 %res = sub <4 x i64> %a0, %a1 2522 ret <4 x i64> %res 2523 } 2524 2525 define <4 x i64> @test_mm256_subs_epi8(<4 x i64> %a0, <4 x i64> %a1) { 2526 ; CHECK-LABEL: test_mm256_subs_epi8: 2527 ; CHECK: # %bb.0: 2528 ; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 2529 ; CHECK-NEXT: ret{{[l|q]}} 2530 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 2531 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 2532 %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %arg0, <32 x i8> %arg1) 2533 %bc = bitcast <32 x i8> %res to <4 x i64> 2534 ret <4 x i64> %bc 2535 } 2536 declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone 2537 2538 define <4 x i64> @test_mm256_subs_epi16(<4 x i64> %a0, <4 x i64> %a1) { 2539 ; CHECK-LABEL: test_mm256_subs_epi16: 2540 ; CHECK: # %bb.0: 2541 ; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 2542 ; CHECK-NEXT: ret{{[l|q]}} 2543 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2544 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 2545 %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %arg0, <16 x i16> %arg1) 2546 %bc = bitcast <16 x i16> %res to <4 x i64> 2547 ret <4 x i64> %bc 2548 } 2549 declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone 2550 2551 define <4 x i64> @test_mm256_subs_epu8(<4 x i64> %a0, <4 x i64> %a1) { 2552 ; CHECK-LABEL: test_mm256_subs_epu8: 2553 ; CHECK: # %bb.0: 2554 ; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 2555 ; CHECK-NEXT: ret{{[l|q]}} 2556 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 2557 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 2558 %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %arg0, <32 x i8> %arg1) 2559 %bc = bitcast <32 x i8> %res to <4 x i64> 2560 ret <4 x i64> %bc 2561 } 2562 declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone 2563 2564 define <4 x i64> @test_mm256_subs_epu16(<4 x i64> %a0, <4 x i64> %a1) { 2565 ; CHECK-LABEL: test_mm256_subs_epu16: 2566 ; CHECK: # %bb.0: 2567 ; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 2568 ; CHECK-NEXT: ret{{[l|q]}} 2569 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2570 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 2571 %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %arg0, <16 x i16> %arg1) 2572 %bc = bitcast <16 x i16> %res to <4 x i64> 2573 ret <4 x i64> %bc 2574 } 2575 declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone 2576 2577 define <4 x i64> @test_mm256_unpackhi_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2578 ; CHECK-LABEL: test_mm256_unpackhi_epi8: 2579 ; CHECK: # %bb.0: 2580 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 2581 ; CHECK-NEXT: ret{{[l|q]}} 2582 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 2583 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 2584 %res = shufflevector <32 x i8> %arg0, <32 x i8> %arg1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 2585 %bc = bitcast <32 x i8> %res to <4 x i64> 2586 ret <4 x i64> %bc 2587 } 2588 2589 define <4 x i64> @test_mm256_unpackhi_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2590 ; CHECK-LABEL: test_mm256_unpackhi_epi16: 2591 ; CHECK: # %bb.0: 2592 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 2593 ; CHECK-NEXT: ret{{[l|q]}} 2594 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2595 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 2596 %res = shufflevector <16 x i16> %arg0, <16 x i16> %arg1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 2597 %bc = bitcast <16 x i16> %res to <4 x i64> 2598 ret <4 x i64> %bc 2599 } 2600 2601 define <4 x i64> @test_mm256_unpackhi_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2602 ; CHECK-LABEL: test_mm256_unpackhi_epi32: 2603 ; CHECK: # %bb.0: 2604 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2605 ; CHECK-NEXT: ret{{[l|q]}} 2606 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2607 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 2608 %res = shufflevector <8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 2609 %bc = bitcast <8 x i32> %res to <4 x i64> 2610 ret <4 x i64> %bc 2611 } 2612 2613 define <4 x i64> @test_mm256_unpackhi_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2614 ; CHECK-LABEL: test_mm256_unpackhi_epi64: 2615 ; CHECK: # %bb.0: 2616 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2617 ; CHECK-NEXT: ret{{[l|q]}} 2618 %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 2619 ret <4 x i64> %res 2620 } 2621 2622 define <4 x i64> @test_mm256_unpacklo_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2623 ; CHECK-LABEL: test_mm256_unpacklo_epi8: 2624 ; CHECK: # %bb.0: 2625 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2626 ; CHECK-NEXT: ret{{[l|q]}} 2627 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 2628 %arg1 = bitcast <4 x i64> %a1 to <32 x i8> 2629 %res = shufflevector <32 x i8> %arg0, <32 x i8> %arg1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55> 2630 %bc = bitcast <32 x i8> %res to <4 x i64> 2631 ret <4 x i64> %bc 2632 } 2633 2634 define <4 x i64> @test_mm256_unpacklo_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2635 ; CHECK-LABEL: test_mm256_unpacklo_epi16: 2636 ; CHECK: # %bb.0: 2637 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 2638 ; CHECK-NEXT: ret{{[l|q]}} 2639 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 2640 %arg1 = bitcast <4 x i64> %a1 to <16 x i16> 2641 %res = shufflevector <16 x i16> %arg0, <16 x i16> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 2642 %bc = bitcast <16 x i16> %res to <4 x i64> 2643 ret <4 x i64> %bc 2644 } 2645 2646 define <4 x i64> @test_mm256_unpacklo_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2647 ; CHECK-LABEL: test_mm256_unpacklo_epi32: 2648 ; CHECK: # %bb.0: 2649 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2650 ; CHECK-NEXT: ret{{[l|q]}} 2651 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 2652 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 2653 %res = shufflevector <8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 2654 %bc = bitcast <8 x i32> %res to <4 x i64> 2655 ret <4 x i64> %bc 2656 } 2657 2658 define <4 x i64> @test_mm256_unpacklo_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2659 ; CHECK-LABEL: test_mm256_unpacklo_epi64: 2660 ; CHECK: # %bb.0: 2661 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2662 ; CHECK-NEXT: ret{{[l|q]}} 2663 %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 2664 ret <4 x i64> %res 2665 } 2666 2667 define <4 x i64> @test_mm256_xor_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2668 ; CHECK-LABEL: test_mm256_xor_si256: 2669 ; CHECK: # %bb.0: 2670 ; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 2671 ; CHECK-NEXT: ret{{[l|q]}} 2672 %res = xor <4 x i64> %a0, %a1 2673 ret <4 x i64> %res 2674 } 2675 2676 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2677 2678 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2679