1 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s 2 3 ; Splat patterns below 4 5 6 define <4 x i32> @shl4(<4 x i32> %A) nounwind { 7 entry: 8 ; CHECK: shl4 9 ; CHECK: padd 10 ; CHECK: pslld 11 ; CHECK: ret 12 %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 13 %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 14 %K = xor <4 x i32> %B, %C 15 ret <4 x i32> %K 16 } 17 18 define <4 x i32> @shr4(<4 x i32> %A) nounwind { 19 entry: 20 ; CHECK: shr4 21 ; CHECK: psrld 22 ; CHECK-NEXT: psrld 23 ; CHECK: ret 24 %B = lshr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 25 %C = lshr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 26 %K = xor <4 x i32> %B, %C 27 ret <4 x i32> %K 28 } 29 30 define <4 x i32> @sra4(<4 x i32> %A) nounwind { 31 entry: 32 ; CHECK: sra4 33 ; CHECK: psrad 34 ; CHECK-NEXT: psrad 35 ; CHECK: ret 36 %B = ashr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 37 %C = ashr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 38 %K = xor <4 x i32> %B, %C 39 ret <4 x i32> %K 40 } 41 42 define <2 x i64> @shl2(<2 x i64> %A) nounwind { 43 entry: 44 ; CHECK: shl2 45 ; CHECK: psllq 46 ; CHECK-NEXT: psllq 47 ; CHECK: ret 48 %B = shl <2 x i64> %A, < i64 2, i64 2> 49 %C = shl <2 x i64> %A, < i64 9, i64 9> 50 %K = xor <2 x i64> %B, %C 51 ret <2 x i64> %K 52 } 53 54 define <2 x i64> @shr2(<2 x i64> %A) nounwind { 55 entry: 56 ; CHECK: shr2 57 ; CHECK: psrlq 58 ; CHECK-NEXT: psrlq 59 ; CHECK: ret 60 %B = lshr <2 x i64> %A, < i64 8, i64 8> 61 %C = lshr <2 x i64> %A, < i64 1, i64 1> 62 %K = xor <2 x i64> %B, %C 63 ret <2 x i64> %K 64 } 65 66 67 define <8 x i16> @shl8(<8 x i16> %A) nounwind { 68 entry: 69 ; CHECK: shl8 70 ; CHECK: padd 71 ; CHECK: psllw 72 ; CHECK: ret 73 %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 74 %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 75 %K = xor <8 x i16> %B, %C 76 ret <8 x i16> %K 77 } 78 79 define <8 x i16> @shr8(<8 x i16> %A) nounwind { 80 entry: 81 ; CHECK: shr8 82 ; CHECK: psrlw 83 ; CHECK-NEXT: psrlw 84 ; CHECK: ret 85 %B = lshr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 86 %C = lshr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 87 %K = xor <8 x i16> %B, %C 88 ret <8 x i16> %K 89 } 90 91 define <8 x i16> @sra8(<8 x i16> %A) nounwind { 92 entry: 93 ; CHECK: sra8 94 ; CHECK: psraw 95 ; CHECK-NEXT: psraw 96 ; CHECK: ret 97 %B = ashr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 98 %C = ashr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 99 %K = xor <8 x i16> %B, %C 100 ret <8 x i16> %K 101 } 102 103 ; non splat test 104 105 106 define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind { 107 entry: 108 ; CHECK: sll8_nosplat 109 ; CHECK-NOT: psll 110 ; CHECK-NOT: psll 111 ; CHECK: ret 112 %B = shl <8 x i16> %A, < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2> 113 %C = shl <8 x i16> %A, < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1> 114 %K = xor <8 x i16> %B, %C 115 ret <8 x i16> %K 116 } 117 118 119 define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind { 120 entry: 121 ; CHECK: shr2_nosplat 122 ; CHECK-NOT: psrlq 123 ; CHECK-NOT: psrlq 124 ; CHECK: ret 125 %B = lshr <2 x i64> %A, < i64 8, i64 1> 126 %C = lshr <2 x i64> %A, < i64 1, i64 0> 127 %K = xor <2 x i64> %B, %C 128 ret <2 x i64> %K 129 } 130 131 132 ; Other shifts 133 134 define <2 x i32> @shl2_other(<2 x i32> %A) nounwind { 135 entry: 136 ; CHECK: shl2_other 137 ; CHECK: psllq 138 ; CHECK: ret 139 %B = shl <2 x i32> %A, < i32 2, i32 2> 140 %C = shl <2 x i32> %A, < i32 9, i32 9> 141 %K = xor <2 x i32> %B, %C 142 ret <2 x i32> %K 143 } 144 145 define <2 x i32> @shr2_other(<2 x i32> %A) nounwind { 146 entry: 147 ; CHECK: shr2_other 148 ; CHECK: psrlq 149 ; CHECK: ret 150 %B = lshr <2 x i32> %A, < i32 8, i32 8> 151 %C = lshr <2 x i32> %A, < i32 1, i32 1> 152 %K = xor <2 x i32> %B, %C 153 ret <2 x i32> %K 154 } 155 156 define <16 x i8> @shl9(<16 x i8> %A) nounwind { 157 %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 158 ret <16 x i8> %B 159 ; CHECK: shl9: 160 ; CHECK: psllw $3 161 ; CHECK: pand 162 ; CHECK: ret 163 } 164 165 define <16 x i8> @shr9(<16 x i8> %A) nounwind { 166 %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 167 ret <16 x i8> %B 168 ; CHECK: shr9: 169 ; CHECK: psrlw $3 170 ; CHECK: pand 171 ; CHECK: ret 172 } 173 174 define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind { 175 %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 176 ret <16 x i8> %B 177 ; CHECK: sra_v16i8_7: 178 ; CHECK: pxor 179 ; CHECK: pcmpgtb 180 ; CHECK: ret 181 } 182 183 define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind { 184 %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 185 ret <16 x i8> %B 186 ; CHECK: sra_v16i8: 187 ; CHECK: psrlw $3 188 ; CHECK: pand 189 ; CHECK: pxor 190 ; CHECK: psubb 191 ; CHECK: ret 192 } 193