1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s 2 3 ; Splat patterns below 4 5 6 define <4 x i32> @shl4(<4 x i32> %A) nounwind { 7 entry: 8 ; CHECK: shl4 9 ; CHECK: pslld 10 ; CHECK: padd 11 ; CHECK: ret 12 %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 13 %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 14 %K = xor <4 x i32> %B, %C 15 ret <4 x i32> %K 16 } 17 18 define <4 x i32> @shr4(<4 x i32> %A) nounwind { 19 entry: 20 ; CHECK: shr4 21 ; CHECK: psrld 22 ; CHECK-NEXT: psrld 23 ; CHECK: ret 24 %B = lshr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 25 %C = lshr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 26 %K = xor <4 x i32> %B, %C 27 ret <4 x i32> %K 28 } 29 30 define <4 x i32> @sra4(<4 x i32> %A) nounwind { 31 entry: 32 ; CHECK: sra4 33 ; CHECK: psrad 34 ; CHECK-NEXT: psrad 35 ; CHECK: ret 36 %B = ashr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 37 %C = ashr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 38 %K = xor <4 x i32> %B, %C 39 ret <4 x i32> %K 40 } 41 42 define <2 x i64> @shl2(<2 x i64> %A) nounwind { 43 entry: 44 ; CHECK: shl2 45 ; CHECK: psllq 46 ; CHECK-NEXT: psllq 47 ; CHECK: ret 48 %B = shl <2 x i64> %A, < i64 2, i64 2> 49 %C = shl <2 x i64> %A, < i64 9, i64 9> 50 %K = xor <2 x i64> %B, %C 51 ret <2 x i64> %K 52 } 53 54 define <2 x i64> @shr2(<2 x i64> %A) nounwind { 55 entry: 56 ; CHECK: shr2 57 ; CHECK: psrlq 58 ; CHECK-NEXT: psrlq 59 ; CHECK: ret 60 %B = lshr <2 x i64> %A, < i64 8, i64 8> 61 %C = lshr <2 x i64> %A, < i64 1, i64 1> 62 %K = xor <2 x i64> %B, %C 63 ret <2 x i64> %K 64 } 65 66 67 define <8 x i16> @shl8(<8 x i16> %A) nounwind { 68 entry: 69 ; CHECK: shl8 70 ; CHECK: psllw 71 ; CHECK: padd 72 ; CHECK: ret 73 %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 74 %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 75 %K = xor <8 x i16> %B, %C 76 ret <8 x i16> %K 77 } 78 79 define <8 x i16> @shr8(<8 x i16> %A) nounwind { 80 entry: 81 ; CHECK: shr8 82 ; CHECK: psrlw 83 ; CHECK-NEXT: psrlw 84 ; CHECK: ret 85 %B = lshr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 86 %C = lshr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 87 %K = xor <8 x i16> %B, %C 88 ret <8 x i16> %K 89 } 90 91 define <8 x i16> @sra8(<8 x i16> %A) nounwind { 92 entry: 93 ; CHECK: sra8 94 ; CHECK: psraw 95 ; CHECK-NEXT: psraw 96 ; CHECK: ret 97 %B = ashr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 98 %C = ashr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 99 %K = xor <8 x i16> %B, %C 100 ret <8 x i16> %K 101 } 102 103 ; non-splat test 104 105 106 define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind { 107 entry: 108 ; CHECK: sll8_nosplat 109 ; CHECK-NOT: psll 110 ; CHECK-NOT: psll 111 ; CHECK: ret 112 %B = shl <8 x i16> %A, < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2> 113 %C = shl <8 x i16> %A, < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1> 114 %K = xor <8 x i16> %B, %C 115 ret <8 x i16> %K 116 } 117 118 119 define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind { 120 entry: 121 ; CHECK-LABEL: shr2_nosplat 122 ; CHECK: movdqa %xmm0, %xmm1 123 ; CHECK-NEXT: psrlq $1, %xmm1 124 ; CHECK-NEXT: movdqa %xmm0, %xmm2 125 ; CHECK-NEXT: psrlq $8, %xmm2 126 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 127 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 128 ; CHECK-NEXT: xorpd %xmm0, %xmm1 129 ; CHECK-NEXT: movapd %xmm1, %xmm0 130 ; CHECK-NEXT: ret 131 %B = lshr <2 x i64> %A, < i64 8, i64 1> 132 %C = lshr <2 x i64> %A, < i64 1, i64 0> 133 %K = xor <2 x i64> %B, %C 134 ret <2 x i64> %K 135 } 136 137 138 ; Other shifts 139 140 define <2 x i32> @shl2_other(<2 x i32> %A) nounwind { 141 entry: 142 ; CHECK: shl2_other 143 ; CHECK: psllq 144 ; CHECK: ret 145 %B = shl <2 x i32> %A, < i32 2, i32 2> 146 %C = shl <2 x i32> %A, < i32 9, i32 9> 147 %K = xor <2 x i32> %B, %C 148 ret <2 x i32> %K 149 } 150 151 define <2 x i32> @shr2_other(<2 x i32> %A) nounwind { 152 entry: 153 ; CHECK: shr2_other 154 ; CHECK: psrlq 155 ; CHECK: ret 156 %B = lshr <2 x i32> %A, < i32 8, i32 8> 157 %C = lshr <2 x i32> %A, < i32 1, i32 1> 158 %K = xor <2 x i32> %B, %C 159 ret <2 x i32> %K 160 } 161 162 define <16 x i8> @shl9(<16 x i8> %A) nounwind { 163 %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 164 ret <16 x i8> %B 165 ; CHECK-LABEL: shl9: 166 ; CHECK: psllw $3 167 ; CHECK: pand 168 ; CHECK: ret 169 } 170 171 define <16 x i8> @shr9(<16 x i8> %A) nounwind { 172 %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 173 ret <16 x i8> %B 174 ; CHECK-LABEL: shr9: 175 ; CHECK: psrlw $3 176 ; CHECK: pand 177 ; CHECK: ret 178 } 179 180 define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind { 181 %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 182 ret <16 x i8> %B 183 ; CHECK-LABEL: sra_v16i8_7: 184 ; CHECK: pxor 185 ; CHECK: pcmpgtb 186 ; CHECK: ret 187 } 188 189 define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind { 190 %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 191 ret <16 x i8> %B 192 ; CHECK-LABEL: sra_v16i8: 193 ; CHECK: psrlw $3 194 ; CHECK: pand 195 ; CHECK: pxor 196 ; CHECK: psubb 197 ; CHECK: ret 198 } 199