1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 3 4 ;;; Shift left 5 define <8 x i32> @vshift00(<8 x i32> %a) { 6 ; CHECK-LABEL: vshift00: 7 ; CHECK: # BB#0: 8 ; CHECK-NEXT: vpslld $2, %xmm0, %xmm1 9 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 10 ; CHECK-NEXT: vpslld $2, %xmm0, %xmm0 11 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 12 ; CHECK-NEXT: retq 13 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 14 2> 15 ret <8 x i32> %s 16 } 17 18 define <16 x i16> @vshift01(<16 x i16> %a) { 19 ; CHECK-LABEL: vshift01: 20 ; CHECK: # BB#0: 21 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm1 22 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 23 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0 24 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 25 ; CHECK-NEXT: retq 26 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 27 ret <16 x i16> %s 28 } 29 30 define <4 x i64> @vshift02(<4 x i64> %a) { 31 ; CHECK-LABEL: vshift02: 32 ; CHECK: # BB#0: 33 ; CHECK-NEXT: vpsllq $2, %xmm0, %xmm1 34 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 35 ; CHECK-NEXT: vpsllq $2, %xmm0, %xmm0 36 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 37 ; CHECK-NEXT: retq 38 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 39 ret <4 x i64> %s 40 } 41 42 ;;; Logical Shift right 43 define <8 x i32> @vshift03(<8 x i32> %a) { 44 ; CHECK-LABEL: vshift03: 45 ; CHECK: # BB#0: 46 ; CHECK-NEXT: vpsrld $2, %xmm0, %xmm1 47 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 48 ; CHECK-NEXT: vpsrld $2, %xmm0, %xmm0 49 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 50 ; CHECK-NEXT: retq 51 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 52 2> 53 ret <8 x i32> %s 54 } 55 56 define <16 x i16> @vshift04(<16 x i16> %a) { 57 ; CHECK-LABEL: vshift04: 58 ; CHECK: # BB#0: 59 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm1 60 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 61 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 62 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 63 ; CHECK-NEXT: retq 64 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 65 ret <16 x i16> %s 66 } 67 68 define <4 x i64> @vshift05(<4 x i64> %a) { 69 ; CHECK-LABEL: vshift05: 70 ; CHECK: # BB#0: 71 ; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm1 72 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 73 ; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm0 74 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 75 ; CHECK-NEXT: retq 76 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 77 ret <4 x i64> %s 78 } 79 80 ;;; Arithmetic Shift right 81 define <8 x i32> @vshift06(<8 x i32> %a) { 82 ; CHECK-LABEL: vshift06: 83 ; CHECK: # BB#0: 84 ; CHECK-NEXT: vpsrad $2, %xmm0, %xmm1 85 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 86 ; CHECK-NEXT: vpsrad $2, %xmm0, %xmm0 87 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 88 ; CHECK-NEXT: retq 89 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 90 2> 91 ret <8 x i32> %s 92 } 93 94 define <16 x i16> @vshift07(<16 x i16> %a) { 95 ; CHECK-LABEL: vshift07: 96 ; CHECK: # BB#0: 97 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm1 98 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 99 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm0 100 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 101 ; CHECK-NEXT: retq 102 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 103 ret <16 x i16> %s 104 } 105 106 define <32 x i8> @vshift09(<32 x i8> %a) { 107 ; CHECK-LABEL: vshift09: 108 ; CHECK: # BB#0: 109 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 110 ; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1 111 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 112 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 113 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] 114 ; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1 115 ; CHECK-NEXT: vpsubb %xmm3, %xmm1, %xmm1 116 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 117 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 118 ; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0 119 ; CHECK-NEXT: vpsubb %xmm3, %xmm0, %xmm0 120 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 121 ; CHECK-NEXT: retq 122 %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 123 ret <32 x i8> %s 124 } 125 126 define <32 x i8> @vshift10(<32 x i8> %a) { 127 ; CHECK-LABEL: vshift10: 128 ; CHECK: # BB#0: 129 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 130 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 131 ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 132 ; CHECK-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 133 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 134 ; CHECK-NEXT: retq 135 %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 136 ret <32 x i8> %s 137 } 138 139 define <32 x i8> @vshift11(<32 x i8> %a) { 140 ; CHECK-LABEL: vshift11: 141 ; CHECK: # BB#0: 142 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 143 ; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1 144 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 145 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 146 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 147 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 148 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 149 ; CHECK-NEXT: retq 150 %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 151 ret <32 x i8> %s 152 } 153 154 define <32 x i8> @vshift12(<32 x i8> %a) { 155 ; CHECK-LABEL: vshift12: 156 ; CHECK: # BB#0: 157 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 158 ; CHECK-NEXT: vpsllw $2, %xmm1, %xmm1 159 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 160 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 161 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0 162 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 163 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 164 ; CHECK-NEXT: retq 165 %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 166 ret <32 x i8> %s 167 } 168 169 ;;; Support variable shifts 170 define <8 x i32> @vshift08(<8 x i32> %a) { 171 ; CHECK-LABEL: vshift08: 172 ; CHECK: # BB#0: 173 ; CHECK-NEXT: vpslld $23, %xmm0, %xmm1 174 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216] 175 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 176 ; CHECK-NEXT: vcvttps2dq %xmm1, %xmm1 177 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 178 ; CHECK-NEXT: vpslld $23, %xmm0, %xmm0 179 ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 180 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 181 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 182 ; CHECK-NEXT: retq 183 %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a 184 ret <8 x i32> %bitop 185 } 186 187 ; PR15141 188 define <4 x i32> @vshift13(<4 x i32> %in) { 189 ; CHECK-LABEL: vshift13: 190 ; CHECK: # BB#0: 191 ; CHECK-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 192 ; CHECK-NEXT: retq 193 %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4> 194 ret <4 x i32> %T 195 } 196 197 ;;; Uses shifts for sign extension 198 define <16 x i16> @sext_v16i16(<16 x i16> %a) { 199 ; CHECK-LABEL: sext_v16i16: 200 ; CHECK: # BB#0: 201 ; CHECK-NEXT: vpsllw $8, %xmm0, %xmm1 202 ; CHECK-NEXT: vpsraw $8, %xmm1, %xmm1 203 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 204 ; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0 205 ; CHECK-NEXT: vpsraw $8, %xmm0, %xmm0 206 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 207 ; CHECK-NEXT: retq 208 %b = trunc <16 x i16> %a to <16 x i8> 209 %c = sext <16 x i8> %b to <16 x i16> 210 ret <16 x i16> %c 211 } 212 213 define <8 x i32> @sext_v8i32(<8 x i32> %a) { 214 ; CHECK-LABEL: sext_v8i32: 215 ; CHECK: # BB#0: 216 ; CHECK-NEXT: vpslld $16, %xmm0, %xmm1 217 ; CHECK-NEXT: vpsrad $16, %xmm1, %xmm1 218 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 219 ; CHECK-NEXT: vpslld $16, %xmm0, %xmm0 220 ; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0 221 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 222 ; CHECK-NEXT: retq 223 %b = trunc <8 x i32> %a to <8 x i16> 224 %c = sext <8 x i16> %b to <8 x i32> 225 ret <8 x i32> %c 226 } 227