1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 4 5 6 ; Verify that the following shifts are lowered into a sequence of two shifts plus 7 ; a blend. On pre-avx2 targets, instead of scalarizing logical and arithmetic 8 ; packed shift right by a constant build_vector the backend should always try to 9 ; emit a simpler sequence of two shifts + blend when possible. 10 11 define <8 x i16> @test1(<8 x i16> %a) { 12 %lshr = lshr <8 x i16> %a, <i16 3, i16 3, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 13 ret <8 x i16> %lshr 14 } 15 ; CHECK-LABEL: test1 16 ; SSE: psrlw 17 ; SSE-NEXT: psrlw 18 ; SSE-NEXT: movss 19 ; AVX: vpsrlw 20 ; AVX-NEXT: vpsrlw 21 ; AVX-NEXT: vmovss 22 ; AVX2: vpsrlw 23 ; AVX2-NEXT: vpsrlw 24 ; AVX2-NEXT: vmovss 25 ; CHECK: ret 26 27 28 define <8 x i16> @test2(<8 x i16> %a) { 29 %lshr = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 2, i16 2, i16 2, i16 2> 30 ret <8 x i16> %lshr 31 } 32 ; CHECK-LABEL: test2 33 ; SSE: psrlw 34 ; SSE-NEXT: psrlw 35 ; SSE-NEXT: movsd 36 ; AVX: vpsrlw 37 ; AVX-NEXT: vpsrlw 38 ; AVX-NEXT: vmovsd 39 ; AVX2: vpsrlw 40 ; AVX2-NEXT: vpsrlw 41 ; AVX2-NEXT: vmovsd 42 ; CHECK: ret 43 44 45 define <4 x i32> @test3(<4 x i32> %a) { 46 %lshr = lshr <4 x i32> %a, <i32 3, i32 2, i32 2, i32 2> 47 ret <4 x i32> %lshr 48 } 49 ; CHECK-LABEL: test3 50 ; SSE: psrld 51 ; SSE-NEXT: psrld 52 ; SSE-NEXT: movss 53 ; AVX: vpsrld 54 ; AVX-NEXT: vpsrld 55 ; AVX-NEXT: vmovss 56 ; AVX2: vpsrlvd 57 ; CHECK: ret 58 59 60 define <4 x i32> @test4(<4 x i32> %a) { 61 %lshr = lshr <4 x i32> %a, <i32 3, i32 3, i32 2, i32 2> 62 ret <4 x i32> %lshr 63 } 64 ; CHECK-LABEL: test4 65 ; SSE: psrld 66 ; SSE-NEXT: psrld 67 ; SSE-NEXT: movsd 68 ; AVX: vpsrld 69 ; AVX-NEXT: vpsrld 70 ; AVX-NEXT: vmovsd 71 ; AVX2: vpsrlvd 72 ; CHECK: ret 73 74 75 define <8 x i16> @test5(<8 x i16> %a) { 76 %lshr = ashr <8 x i16> %a, <i16 3, i16 3, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 77 ret <8 x i16> %lshr 78 } 79 80 define <8 x i16> @test6(<8 x i16> %a) { 81 %lshr = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 2, i16 2, i16 2, i16 2> 82 ret <8 x i16> %lshr 83 } 84 ; CHECK-LABEL: test6 85 ; SSE: psraw 86 ; SSE-NEXT: psraw 87 ; SSE-NEXT: movsd 88 ; AVX: vpsraw 89 ; AVX-NEXT: vpsraw 90 ; AVX-NEXT: vmovsd 91 ; AVX2: vpsraw 92 ; AVX2-NEXT: vpsraw 93 ; AVX2-NEXT: vmovsd 94 ; CHECK: ret 95 96 97 define <4 x i32> @test7(<4 x i32> %a) { 98 %lshr = ashr <4 x i32> %a, <i32 3, i32 2, i32 2, i32 2> 99 ret <4 x i32> %lshr 100 } 101 ; CHECK-LABEL: test7 102 ; SSE: psrad 103 ; SSE-NEXT: psrad 104 ; SSE-NEXT: movss 105 ; AVX: vpsrad 106 ; AVX-NEXT: vpsrad 107 ; AVX-NEXT: vmovss 108 ; AVX2: vpsravd 109 ; CHECK: ret 110 111 112 define <4 x i32> @test8(<4 x i32> %a) { 113 %lshr = ashr <4 x i32> %a, <i32 3, i32 3, i32 2, i32 2> 114 ret <4 x i32> %lshr 115 } 116 ; CHECK-LABEL: test8 117 ; SSE: psrad 118 ; SSE-NEXT: psrad 119 ; SSE-NEXT: movsd 120 ; AVX: vpsrad 121 ; AVX-NEXT: vpsrad 122 ; AVX-NEXT: vmovsd 123 ; AVX2: vpsravd 124 ; CHECK: ret 125 126