1 ; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s 2 3 ; Verify that the backend correctly combines AVX2 builtin intrinsics. 4 5 6 define <8 x i32> @test_psra_1(<8 x i32> %A) { 7 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 3) 8 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 3, i32 0, i32 7, i32 0>) 9 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 2) 10 ret <8 x i32> %3 11 } 12 ; CHECK-LABEL: test_psra_1 13 ; CHECK: vpsrad $8, %ymm0, %ymm0 14 ; CHECK-NEXT: ret 15 16 define <16 x i16> @test_psra_2(<16 x i16> %A) { 17 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 3) 18 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 19 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 2) 20 ret <16 x i16> %3 21 } 22 ; CHECK-LABEL: test_psra_2 23 ; CHECK: vpsraw $8, %ymm0, %ymm0 24 ; CHECK-NEXT: ret 25 26 define <16 x i16> @test_psra_3(<16 x i16> %A) { 27 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0) 28 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 29 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0) 30 ret <16 x i16> %3 31 } 32 ; CHECK-LABEL: test_psra_3 33 ; CHECK-NOT: vpsraw 34 ; CHECK: ret 35 36 define <8 x i32> @test_psra_4(<8 x i32> %A) { 37 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0) 38 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>) 39 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0) 40 ret <8 x i32> %3 41 } 42 ; CHECK-LABEL: test_psra_4 43 ; CHECK-NOT: vpsrad 44 ; CHECK: ret 45 46 47 define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) { 48 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a0, <32 x i8> %a1) 49 ret <32 x i8> %res 50 } 51 ; CHECK-LABEL: test_x86_avx2_pblendvb 52 ; CHECK-NOT: vpblendvb 53 ; CHECK: ret 54 55 56 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0) { 57 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a0, i32 7) 58 ret <16 x i16> %res 59 } 60 ; CHECK-LABEL: test_x86_avx2_pblendw 61 ; CHECK-NOT: vpblendw 62 ; CHECK: ret 63 64 65 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0) { 66 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a0, i32 7) 67 ret <4 x i32> %res 68 } 69 ; CHECK-LABEL: test_x86_avx2_pblendd_128 70 ; CHECK-NOT: vpblendd 71 ; CHECK: ret 72 73 74 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0) { 75 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a0, i32 7) 76 ret <8 x i32> %res 77 } 78 ; CHECK-LABEL: test_x86_avx2_pblendd_256 79 ; CHECK-NOT: vpblendd 80 ; CHECK: ret 81 82 83 define <32 x i8> @test2_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) { 84 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> zeroinitializer) 85 ret <32 x i8> %res 86 } 87 ; CHECK-LABEL: test2_x86_avx2_pblendvb 88 ; CHECK-NOT: vpblendvb 89 ; CHECK: ret 90 91 92 define <16 x i16> @test2_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 93 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 0) 94 ret <16 x i16> %res 95 } 96 ; CHECK-LABEL: test2_x86_avx2_pblendw 97 ; CHECK-NOT: vpblendw 98 ; CHECK: ret 99 100 101 define <4 x i32> @test2_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 102 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 0) 103 ret <4 x i32> %res 104 } 105 ; CHECK-LABEL: test2_x86_avx2_pblendd_128 106 ; CHECK-NOT: vpblendd 107 ; CHECK: ret 108 109 110 define <8 x i32> @test2_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 111 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 0) 112 ret <8 x i32> %res 113 } 114 ; CHECK-LABEL: test2_x86_avx2_pblendd_256 115 ; CHECK-NOT: vpblendd 116 ; CHECK: ret 117 118 119 define <32 x i8> @test3_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) { 120 %1 = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <32 x i8> 121 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %1) 122 ret <32 x i8> %res 123 } 124 ; CHECK-LABEL: test3_x86_avx2_pblendvb 125 ; CHECK-NOT: vpblendvb 126 ; CHECK: ret 127 128 129 define <16 x i16> @test3_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 130 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 -1) 131 ret <16 x i16> %res 132 } 133 ; CHECK-LABEL: test3_x86_avx2_pblendw 134 ; CHECK-NOT: vpblendw 135 ; CHECK: ret 136 137 138 define <4 x i32> @test3_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 139 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 -1) 140 ret <4 x i32> %res 141 } 142 ; CHECK-LABEL: test3_x86_avx2_pblendd_128 143 ; CHECK-NOT: vpblendd 144 ; CHECK: ret 145 146 147 define <8 x i32> @test3_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 148 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 -1) 149 ret <8 x i32> %res 150 } 151 ; CHECK-LABEL: test3_x86_avx2_pblendd_256 152 ; CHECK-NOT: vpblendd 153 ; CHECK: ret 154 155 156 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) 157 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) 158 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) 159 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) 160 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) 161 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) 162 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) 163 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) 164 165