1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 6 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512F 7 ; 8 ; Combine tests involving SSE3/SSSE3 target shuffles (MOVDDUP, MOVSHDUP, MOVSLDUP, PSHUFB) 9 10 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) 11 12 define <16 x i8> @combine_vpshufb_as_zero(<16 x i8> %a0) { 13 ; SSE-LABEL: combine_vpshufb_as_zero: 14 ; SSE: # %bb.0: 15 ; SSE-NEXT: xorps %xmm0, %xmm0 16 ; SSE-NEXT: retq 17 ; 18 ; AVX-LABEL: combine_vpshufb_as_zero: 19 ; AVX: # %bb.0: 20 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 21 ; AVX-NEXT: retq 22 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) 23 %res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) 24 %res2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res1, <16 x i8> <i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) 25 ret <16 x i8> %res2 26 } 27 28 define <16 x i8> @combine_vpshufb_as_movq(<16 x i8> %a0) { 29 ; SSE-LABEL: combine_vpshufb_as_movq: 30 ; SSE: # %bb.0: 31 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 32 ; SSE-NEXT: retq 33 ; 34 ; AVX-LABEL: combine_vpshufb_as_movq: 35 ; AVX: # %bb.0: 36 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 37 ; AVX-NEXT: retq 38 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 128, i8 1, i8 128, i8 2, i8 128, i8 3, i8 128, i8 4, i8 128, i8 5, i8 128, i8 6, i8 128, i8 7, i8 128>) 39 %res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 1, i8 3, i8 5, i8 7, i8 9, i8 11, i8 13, i8 15>) 40 ret <16 x i8> %res1 41 } 42 43 define <2 x double> @combine_pshufb_as_movsd(<2 x double> %a0, <2 x double> %a1) { 44 ; SSSE3-LABEL: combine_pshufb_as_movsd: 45 ; SSSE3: # %bb.0: 46 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 47 ; SSSE3-NEXT: movapd %xmm1, %xmm0 48 ; SSSE3-NEXT: retq 49 ; 50 ; SSE41-LABEL: combine_pshufb_as_movsd: 51 ; SSE41: # %bb.0: 52 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 53 ; SSE41-NEXT: retq 54 ; 55 ; AVX-LABEL: combine_pshufb_as_movsd: 56 ; AVX: # %bb.0: 57 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 58 ; AVX-NEXT: retq 59 %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 3, i32 0> 60 %2 = bitcast <2 x double> %1 to <16 x i8> 61 %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>) 62 %4 = bitcast <16 x i8> %3 to <2 x double> 63 ret <2 x double> %4 64 } 65 66 define <4 x float> @combine_pshufb_as_movss(<4 x float> %a0, <4 x float> %a1) { 67 ; SSSE3-LABEL: combine_pshufb_as_movss: 68 ; SSSE3: # %bb.0: 69 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 70 ; SSSE3-NEXT: retq 71 ; 72 ; SSE41-LABEL: combine_pshufb_as_movss: 73 ; SSE41: # %bb.0: 74 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 75 ; SSE41-NEXT: retq 76 ; 77 ; AVX-LABEL: combine_pshufb_as_movss: 78 ; AVX: # %bb.0: 79 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 80 ; AVX-NEXT: retq 81 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 3, i32 2, i32 1> 82 %2 = bitcast <4 x float> %1 to <16 x i8> 83 %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 12, i8 13, i8 14, i8 15, i8 8, i8 9, i8 10, i8 11, i8 4, i8 5, i8 6, i8 7>) 84 %4 = bitcast <16 x i8> %3 to <4 x float> 85 ret <4 x float> %4 86 } 87 88 define <4 x i32> @combine_pshufb_as_zext(<16 x i8> %a0) { 89 ; SSSE3-LABEL: combine_pshufb_as_zext: 90 ; SSSE3: # %bb.0: 91 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 92 ; SSSE3-NEXT: retq 93 ; 94 ; SSE41-LABEL: combine_pshufb_as_zext: 95 ; SSE41: # %bb.0: 96 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 97 ; SSE41-NEXT: retq 98 ; 99 ; AVX-LABEL: combine_pshufb_as_zext: 100 ; AVX: # %bb.0: 101 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 102 ; AVX-NEXT: retq 103 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 2, i8 -1, i8 -1, i8 -1, i8 3, i8 -1, i8 -1, i8 -1>) 104 %2 = bitcast <16 x i8> %1 to <4 x i32> 105 ret <4 x i32> %2 106 } 107 108 define <2 x double> @combine_pshufb_as_vzmovl_64(<2 x double> %a0) { 109 ; SSE-LABEL: combine_pshufb_as_vzmovl_64: 110 ; SSE: # %bb.0: 111 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 112 ; SSE-NEXT: retq 113 ; 114 ; AVX-LABEL: combine_pshufb_as_vzmovl_64: 115 ; AVX: # %bb.0: 116 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 117 ; AVX-NEXT: retq 118 %1 = bitcast <2 x double> %a0 to <16 x i8> 119 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 120 %3 = bitcast <16 x i8> %2 to <2 x double> 121 ret <2 x double> %3 122 } 123 124 define <4 x float> @combine_pshufb_as_vzmovl_32(<4 x float> %a0) { 125 ; SSSE3-LABEL: combine_pshufb_as_vzmovl_32: 126 ; SSSE3: # %bb.0: 127 ; SSSE3-NEXT: xorps %xmm1, %xmm1 128 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 129 ; SSSE3-NEXT: movaps %xmm1, %xmm0 130 ; SSSE3-NEXT: retq 131 ; 132 ; SSE41-LABEL: combine_pshufb_as_vzmovl_32: 133 ; SSE41: # %bb.0: 134 ; SSE41-NEXT: xorps %xmm1, %xmm1 135 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 136 ; SSE41-NEXT: retq 137 ; 138 ; AVX-LABEL: combine_pshufb_as_vzmovl_32: 139 ; AVX: # %bb.0: 140 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 141 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 142 ; AVX-NEXT: retq 143 %1 = bitcast <4 x float> %a0 to <16 x i8> 144 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 145 %3 = bitcast <16 x i8> %2 to <4 x float> 146 ret <4 x float> %3 147 } 148 149 define <4 x float> @combine_pshufb_movddup(<4 x float> %a0) { 150 ; SSE-LABEL: combine_pshufb_movddup: 151 ; SSE: # %bb.0: 152 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,5,5,5,5,7,7,7,7] 153 ; SSE-NEXT: retq 154 ; 155 ; AVX-LABEL: combine_pshufb_movddup: 156 ; AVX: # %bb.0: 157 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,5,5,5,5,7,7,7,7] 158 ; AVX-NEXT: retq 159 %1 = bitcast <4 x float> %a0 to <16 x i8> 160 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>) 161 %3 = bitcast <16 x i8> %2 to <4 x float> 162 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 163 ret <4 x float> %4 164 } 165 166 define <4 x float> @combine_pshufb_movshdup(<4 x float> %a0) { 167 ; SSE-LABEL: combine_pshufb_movshdup: 168 ; SSE: # %bb.0: 169 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,7,7,3,3,3,3,3,3,3,3] 170 ; SSE-NEXT: retq 171 ; 172 ; AVX-LABEL: combine_pshufb_movshdup: 173 ; AVX: # %bb.0: 174 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,7,7,3,3,3,3,3,3,3,3] 175 ; AVX-NEXT: retq 176 %1 = bitcast <4 x float> %a0 to <16 x i8> 177 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>) 178 %3 = bitcast <16 x i8> %2 to <4 x float> 179 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 180 ret <4 x float> %4 181 } 182 183 define <4 x float> @combine_pshufb_movsldup(<4 x float> %a0) { 184 ; SSE-LABEL: combine_pshufb_movsldup: 185 ; SSE: # %bb.0: 186 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,1,1,1,1,1,1,1,1] 187 ; SSE-NEXT: retq 188 ; 189 ; AVX-LABEL: combine_pshufb_movsldup: 190 ; AVX: # %bb.0: 191 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,1,1,1,1,1,1,1,1] 192 ; AVX-NEXT: retq 193 %1 = bitcast <4 x float> %a0 to <16 x i8> 194 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>) 195 %3 = bitcast <16 x i8> %2 to <4 x float> 196 %4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 197 ret <4 x float> %4 198 } 199 200 define <16 x i8> @combine_pshufb_palignr(<16 x i8> %a0, <16 x i8> %a1) { 201 ; SSE-LABEL: combine_pshufb_palignr: 202 ; SSE: # %bb.0: 203 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 204 ; SSE-NEXT: retq 205 ; 206 ; AVX-LABEL: combine_pshufb_palignr: 207 ; AVX: # %bb.0: 208 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3] 209 ; AVX-NEXT: retq 210 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 211 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>) 212 ret <16 x i8> %2 213 } 214 215 define <16 x i8> @combine_pshufb_pslldq(<16 x i8> %a0) { 216 ; SSE-LABEL: combine_pshufb_pslldq: 217 ; SSE: # %bb.0: 218 ; SSE-NEXT: xorps %xmm0, %xmm0 219 ; SSE-NEXT: retq 220 ; 221 ; AVX-LABEL: combine_pshufb_pslldq: 222 ; AVX: # %bb.0: 223 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 224 ; AVX-NEXT: retq 225 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>) 226 %2 = shufflevector <16 x i8> %1, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 227 ret <16 x i8> %2 228 } 229 230 define <16 x i8> @combine_pshufb_psrldq(<16 x i8> %a0) { 231 ; SSE-LABEL: combine_pshufb_psrldq: 232 ; SSE: # %bb.0: 233 ; SSE-NEXT: xorps %xmm0, %xmm0 234 ; SSE-NEXT: retq 235 ; 236 ; AVX-LABEL: combine_pshufb_psrldq: 237 ; AVX: # %bb.0: 238 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 239 ; AVX-NEXT: retq 240 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) 241 %2 = shufflevector <16 x i8> %1, <16 x i8> zeroinitializer, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 242 ret <16 x i8> %2 243 } 244 245 define <16 x i8> @combine_and_pshufb(<16 x i8> %a0) { 246 ; SSSE3-LABEL: combine_and_pshufb: 247 ; SSSE3: # %bb.0: 248 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 249 ; SSSE3-NEXT: retq 250 ; 251 ; SSE41-LABEL: combine_and_pshufb: 252 ; SSE41: # %bb.0: 253 ; SSE41-NEXT: pxor %xmm1, %xmm1 254 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7] 255 ; SSE41-NEXT: retq 256 ; 257 ; AVX-LABEL: combine_and_pshufb: 258 ; AVX: # %bb.0: 259 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 260 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7] 261 ; AVX-NEXT: retq 262 %1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 263 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 8, i8 9, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 264 ret <16 x i8> %2 265 } 266 267 define <16 x i8> @combine_pshufb_and(<16 x i8> %a0) { 268 ; SSSE3-LABEL: combine_pshufb_and: 269 ; SSSE3: # %bb.0: 270 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 271 ; SSSE3-NEXT: retq 272 ; 273 ; SSE41-LABEL: combine_pshufb_and: 274 ; SSE41: # %bb.0: 275 ; SSE41-NEXT: pxor %xmm1, %xmm1 276 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7] 277 ; SSE41-NEXT: retq 278 ; 279 ; AVX-LABEL: combine_pshufb_and: 280 ; AVX: # %bb.0: 281 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 282 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7] 283 ; AVX-NEXT: retq 284 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 8, i8 9, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 285 %2 = shufflevector <16 x i8> %1, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 286 ret <16 x i8> %2 287 } 288 289 define <16 x i8> @combine_pshufb_as_palignr(<16 x i8> %a0) { 290 ; SSE-LABEL: combine_pshufb_as_palignr: 291 ; SSE: # %bb.0: 292 ; SSE-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 293 ; SSE-NEXT: retq 294 ; 295 ; AVX-LABEL: combine_pshufb_as_palignr: 296 ; AVX: # %bb.0: 297 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 298 ; AVX-NEXT: retq 299 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 undef, i8 undef, i8 0>) 300 ret <16 x i8> %res0 301 } 302 303 define <16 x i8> @combine_pshufb_as_pslldq(<16 x i8> %a0) { 304 ; SSE-LABEL: combine_pshufb_as_pslldq: 305 ; SSE: # %bb.0: 306 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 307 ; SSE-NEXT: retq 308 ; 309 ; AVX-LABEL: combine_pshufb_as_pslldq: 310 ; AVX: # %bb.0: 311 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 312 ; AVX-NEXT: retq 313 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>) 314 ret <16 x i8> %res0 315 } 316 317 define <16 x i8> @combine_pshufb_as_psrldq(<16 x i8> %a0) { 318 ; SSE-LABEL: combine_pshufb_as_psrldq: 319 ; SSE: # %bb.0: 320 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 321 ; SSE-NEXT: retq 322 ; 323 ; AVX-LABEL: combine_pshufb_as_psrldq: 324 ; AVX: # %bb.0: 325 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 326 ; AVX-NEXT: retq 327 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) 328 ret <16 x i8> %res0 329 } 330 331 define <16 x i8> @combine_pshufb_as_psrlw(<16 x i8> %a0) { 332 ; SSE-LABEL: combine_pshufb_as_psrlw: 333 ; SSE: # %bb.0: 334 ; SSE-NEXT: psrlw $8, %xmm0 335 ; SSE-NEXT: retq 336 ; 337 ; AVX-LABEL: combine_pshufb_as_psrlw: 338 ; AVX: # %bb.0: 339 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 340 ; AVX-NEXT: retq 341 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 1, i8 128, i8 3, i8 128, i8 5, i8 128, i8 7, i8 128, i8 9, i8 128, i8 11, i8 128, i8 13, i8 128, i8 15, i8 128>) 342 ret <16 x i8> %res0 343 } 344 345 define <16 x i8> @combine_pshufb_as_pslld(<16 x i8> %a0) { 346 ; SSE-LABEL: combine_pshufb_as_pslld: 347 ; SSE: # %bb.0: 348 ; SSE-NEXT: pslld $24, %xmm0 349 ; SSE-NEXT: retq 350 ; 351 ; AVX-LABEL: combine_pshufb_as_pslld: 352 ; AVX: # %bb.0: 353 ; AVX-NEXT: vpslld $24, %xmm0, %xmm0 354 ; AVX-NEXT: retq 355 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 128, i8 128, i8 128, i8 0, i8 128, i8 128, i8 128, i8 4, i8 128, i8 128, i8 128, i8 8, i8 128, i8 128, i8 128, i8 12>) 356 ret <16 x i8> %res0 357 } 358 359 define <16 x i8> @combine_pshufb_as_psrlq(<16 x i8> %a0) { 360 ; SSE-LABEL: combine_pshufb_as_psrlq: 361 ; SSE: # %bb.0: 362 ; SSE-NEXT: psrlq $40, %xmm0 363 ; SSE-NEXT: retq 364 ; 365 ; AVX-LABEL: combine_pshufb_as_psrlq: 366 ; AVX: # %bb.0: 367 ; AVX-NEXT: vpsrlq $40, %xmm0, %xmm0 368 ; AVX-NEXT: retq 369 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128>) 370 ret <16 x i8> %res0 371 } 372 373 define <16 x i8> @combine_pshufb_as_pshuflw(<16 x i8> %a0) { 374 ; SSE-LABEL: combine_pshufb_as_pshuflw: 375 ; SSE: # %bb.0: 376 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] 377 ; SSE-NEXT: retq 378 ; 379 ; AVX-LABEL: combine_pshufb_as_pshuflw: 380 ; AVX: # %bb.0: 381 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] 382 ; AVX-NEXT: retq 383 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>) 384 ret <16 x i8> %res0 385 } 386 387 define <16 x i8> @combine_pshufb_as_pshufhw(<16 x i8> %a0) { 388 ; SSE-LABEL: combine_pshufb_as_pshufhw: 389 ; SSE: # %bb.0: 390 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 391 ; SSE-NEXT: retq 392 ; 393 ; AVX-LABEL: combine_pshufb_as_pshufhw: 394 ; AVX: # %bb.0: 395 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 396 ; AVX-NEXT: retq 397 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>) 398 ret <16 x i8> %res0 399 } 400 401 define <16 x i8> @combine_pshufb_not_as_pshufw(<16 x i8> %a0) { 402 ; SSE-LABEL: combine_pshufb_not_as_pshufw: 403 ; SSE: # %bb.0: 404 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13] 405 ; SSE-NEXT: retq 406 ; 407 ; AVX-LABEL: combine_pshufb_not_as_pshufw: 408 ; AVX: # %bb.0: 409 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13] 410 ; AVX-NEXT: retq 411 %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>) 412 %res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>) 413 ret <16 x i8> %res1 414 } 415 416 define <16 x i8> @combine_vpshufb_as_pshuflw_not_pslld(<16 x i8> *%a0) { 417 ; SSE-LABEL: combine_vpshufb_as_pshuflw_not_pslld: 418 ; SSE: # %bb.0: 419 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,2,2,4,5,6,7] 420 ; SSE-NEXT: retq 421 ; 422 ; AVX-LABEL: combine_vpshufb_as_pshuflw_not_pslld: 423 ; AVX: # %bb.0: 424 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,2,2,4,5,6,7] 425 ; AVX-NEXT: retq 426 %res0 = load <16 x i8>, <16 x i8> *%a0, align 16 427 %res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 undef, i8 undef, i8 0, i8 1, i8 undef, i8 undef, i8 4, i8 5, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) 428 ret <16 x i8> %res1 429 } 430 431 define <16 x i8> @combine_pshufb_as_unary_unpcklbw(<16 x i8> %a0) { 432 ; SSE-LABEL: combine_pshufb_as_unary_unpcklbw: 433 ; SSE: # %bb.0: 434 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 435 ; SSE-NEXT: retq 436 ; 437 ; AVX-LABEL: combine_pshufb_as_unary_unpcklbw: 438 ; AVX: # %bb.0: 439 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 440 ; AVX-NEXT: retq 441 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 undef, i8 undef, i8 1, i8 2, i8 2, i8 3, i8 3, i8 4, i8 4, i8 5, i8 5, i8 6, i8 6, i8 7, i8 7>) 442 ret <16 x i8> %1 443 } 444 445 define <16 x i8> @combine_pshufb_as_unary_unpckhwd(<16 x i8> %a0) { 446 ; SSE-LABEL: combine_pshufb_as_unary_unpckhwd: 447 ; SSE: # %bb.0: 448 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 449 ; SSE-NEXT: retq 450 ; 451 ; AVX-LABEL: combine_pshufb_as_unary_unpckhwd: 452 ; AVX: # %bb.0: 453 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 454 ; AVX-NEXT: retq 455 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 8, i8 9, i8 8, i8 9, i8 10, i8 11, i8 10, i8 11, i8 12, i8 13, i8 12, i8 13, i8 14, i8 15, i8 undef, i8 undef>) 456 ret <16 x i8> %1 457 } 458 459 define <8 x i16> @combine_pshufb_as_unpacklo_undef(<16 x i8> %a0) { 460 ; ALL-LABEL: combine_pshufb_as_unpacklo_undef: 461 ; ALL: # %bb.0: 462 ; ALL-NEXT: retq 463 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 undef, i8 0, i8 1, i8 undef, i8 undef, i8 2, i8 3, i8 undef, i8 undef, i8 4, i8 5, i8 undef, i8 undef, i8 6, i8 7>) 464 %2 = bitcast <16 x i8> %1 to <8 x i16> 465 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 466 ret <8 x i16> %3 467 } 468 469 define <16 x i8> @combine_pshufb_as_unpackhi_undef(<16 x i8> %a0) { 470 ; ALL-LABEL: combine_pshufb_as_unpackhi_undef: 471 ; ALL: # %bb.0: 472 ; ALL-NEXT: retq 473 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 8, i8 undef, i8 9, i8 undef, i8 10, i8 undef, i8 11, i8 undef, i8 12, i8 undef, i8 13, i8 undef, i8 14, i8 undef, i8 15, i8 undef>) 474 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 475 ret <16 x i8> %2 476 } 477 478 define <16 x i8> @combine_pshufb_as_unpacklo_zero(<16 x i8> %a0) { 479 ; SSE-LABEL: combine_pshufb_as_unpacklo_zero: 480 ; SSE: # %bb.0: 481 ; SSE-NEXT: xorps %xmm1, %xmm1 482 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 483 ; SSE-NEXT: movaps %xmm1, %xmm0 484 ; SSE-NEXT: retq 485 ; 486 ; AVX-LABEL: combine_pshufb_as_unpacklo_zero: 487 ; AVX: # %bb.0: 488 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 489 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 490 ; AVX-NEXT: retq 491 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 4, i8 5, i8 6, i8 7>) 492 ret <16 x i8> %1 493 } 494 495 define <16 x i8> @combine_pshufb_as_unpackhi_zero(<16 x i8> %a0) { 496 ; SSE-LABEL: combine_pshufb_as_unpackhi_zero: 497 ; SSE: # %bb.0: 498 ; SSE-NEXT: pxor %xmm1, %xmm1 499 ; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 500 ; SSE-NEXT: retq 501 ; 502 ; AVX-LABEL: combine_pshufb_as_unpackhi_zero: 503 ; AVX: # %bb.0: 504 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 505 ; AVX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 506 ; AVX-NEXT: retq 507 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 8, i8 -1, i8 9, i8 -1, i8 10, i8 -1, i8 11, i8 -1, i8 12, i8 -1, i8 13, i8 -1, i8 14, i8 -1, i8 15, i8 -1>) 508 ret <16 x i8> %1 509 } 510 511 define <16 x i8> @combine_psrlw_pshufb(<8 x i16> %a0) { 512 ; SSE-LABEL: combine_psrlw_pshufb: 513 ; SSE: # %bb.0: 514 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero 515 ; SSE-NEXT: retq 516 ; 517 ; AVX-LABEL: combine_psrlw_pshufb: 518 ; AVX: # %bb.0: 519 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero 520 ; AVX-NEXT: retq 521 %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 522 %2 = bitcast <8 x i16> %1 to <16 x i8> 523 %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1>) 524 ret <16 x i8> %3 525 } 526 527 define <16 x i8> @combine_pslld_pshufb(<4 x i32> %a0) { 528 ; SSE-LABEL: combine_pslld_pshufb: 529 ; SSE: # %bb.0: 530 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,1,0],zero,xmm0[6,5,4],zero,xmm0[10,9,8],zero,xmm0[14,13,12],zero 531 ; SSE-NEXT: retq 532 ; 533 ; AVX-LABEL: combine_pslld_pshufb: 534 ; AVX: # %bb.0: 535 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,1,0],zero,xmm0[6,5,4],zero,xmm0[10,9,8],zero,xmm0[14,13,12],zero 536 ; AVX-NEXT: retq 537 %1 = shl <4 x i32> %a0, <i32 8, i32 8, i32 8, i32 8> 538 %2 = bitcast <4 x i32> %1 to <16 x i8> 539 %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 11, i8 10, i8 9, i8 8, i8 15, i8 14, i8 13, i8 12>) 540 ret <16 x i8> %3 541 } 542 543 define <16 x i8> @combine_psrlq_pshufb(<2 x i64> %a0) { 544 ; SSE-LABEL: combine_psrlq_pshufb: 545 ; SSE: # %bb.0: 546 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[7,6],zero,zero,zero,zero,zero,zero,xmm0[15,14] 547 ; SSE-NEXT: retq 548 ; 549 ; AVX-LABEL: combine_psrlq_pshufb: 550 ; AVX: # %bb.0: 551 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[7,6],zero,zero,zero,zero,zero,zero,xmm0[15,14] 552 ; AVX-NEXT: retq 553 %1 = lshr <2 x i64> %a0, <i64 48, i64 48> 554 %2 = bitcast <2 x i64> %1 to <16 x i8> 555 %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>) 556 ret <16 x i8> %3 557 } 558 559 define <16 x i8> @combine_unpckl_arg0_pshufb(<16 x i8> %a0, <16 x i8> %a1) { 560 ; SSE-LABEL: combine_unpckl_arg0_pshufb: 561 ; SSE: # %bb.0: 562 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero 563 ; SSE-NEXT: retq 564 ; 565 ; AVX-LABEL: combine_unpckl_arg0_pshufb: 566 ; AVX: # %bb.0: 567 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero 568 ; AVX-NEXT: retq 569 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 570 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1>) 571 ret <16 x i8> %2 572 } 573 574 define <16 x i8> @combine_unpckl_arg1_pshufb(<16 x i8> %a0, <16 x i8> %a1) { 575 ; SSE-LABEL: combine_unpckl_arg1_pshufb: 576 ; SSE: # %bb.0: 577 ; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero 578 ; SSE-NEXT: movdqa %xmm1, %xmm0 579 ; SSE-NEXT: retq 580 ; 581 ; AVX-LABEL: combine_unpckl_arg1_pshufb: 582 ; AVX: # %bb.0: 583 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero 584 ; AVX-NEXT: retq 585 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 586 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1>) 587 ret <16 x i8> %2 588 } 589 590 define <8 x i16> @shuffle_combine_unpack_insert(<8 x i16> %a0) { 591 ; SSE-LABEL: shuffle_combine_unpack_insert: 592 ; SSE: # %bb.0: 593 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,8,9,8,9,8,9,10,11,10,11] 594 ; SSE-NEXT: retq 595 ; 596 ; AVX-LABEL: shuffle_combine_unpack_insert: 597 ; AVX: # %bb.0: 598 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,8,9,8,9,8,9,10,11,10,11] 599 ; AVX-NEXT: retq 600 %1 = extractelement <8 x i16> %a0, i32 2 601 %2 = extractelement <8 x i16> %a0, i32 4 602 %3 = insertelement <8 x i16> %a0, i16 %1, i32 4 603 %4 = insertelement <8 x i16> %a0, i16 %2, i32 2 604 %5 = shufflevector <8 x i16> %3, <8 x i16> %4, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 605 %6 = shufflevector <8 x i16> %5, <8 x i16> %3, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 606 %7 = shufflevector <8 x i16> %5, <8 x i16> %a0, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 607 %8 = shufflevector <8 x i16> %6, <8 x i16> %7, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 608 ret <8 x i16> %8 609 } 610 611 define <16 x i8> @shuffle_combine_packssdw_pshufb(<4 x i32> %a0) { 612 ; SSE-LABEL: shuffle_combine_packssdw_pshufb: 613 ; SSE: # %bb.0: 614 ; SSE-NEXT: psrad $31, %xmm0 615 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[13,12,9,8,5,4,1,0,13,12,9,8,5,4,1,0] 616 ; SSE-NEXT: retq 617 ; 618 ; AVX-LABEL: shuffle_combine_packssdw_pshufb: 619 ; AVX: # %bb.0: 620 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 621 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[13,12,9,8,5,4,1,0,13,12,9,8,5,4,1,0] 622 ; AVX-NEXT: retq 623 %1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31> 624 %2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %1) 625 %3 = bitcast <8 x i16> %2 to <16 x i8> 626 %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>) 627 ret <16 x i8> %4 628 } 629 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 630 631 define <16 x i8> @shuffle_combine_packsswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) { 632 ; SSE-LABEL: shuffle_combine_packsswb_pshufb: 633 ; SSE: # %bb.0: 634 ; SSE-NEXT: psraw $15, %xmm0 635 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0] 636 ; SSE-NEXT: retq 637 ; 638 ; AVX-LABEL: shuffle_combine_packsswb_pshufb: 639 ; AVX: # %bb.0: 640 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 641 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0] 642 ; AVX-NEXT: retq 643 %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 644 %2 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 645 %3 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2) 646 %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>) 647 ret <16 x i8> %4 648 } 649 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 650 651 define <16 x i8> @shuffle_combine_packuswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) { 652 ; SSE-LABEL: shuffle_combine_packuswb_pshufb: 653 ; SSE: # %bb.0: 654 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1] 655 ; SSE-NEXT: retq 656 ; 657 ; AVX-LABEL: shuffle_combine_packuswb_pshufb: 658 ; AVX: # %bb.0: 659 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1] 660 ; AVX-NEXT: retq 661 %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 662 %2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 663 %3 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2) 664 %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>) 665 ret <16 x i8> %4 666 } 667 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 668 669 define <16 x i8> @constant_fold_pshufb() { 670 ; SSE-LABEL: constant_fold_pshufb: 671 ; SSE: # %bb.0: 672 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <14,0,0,0,u,u,0,0,0,0,0,0,0,0,8,9> 673 ; SSE-NEXT: retq 674 ; 675 ; AVX-LABEL: constant_fold_pshufb: 676 ; AVX: # %bb.0: 677 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <14,0,0,0,u,u,0,0,0,0,0,0,0,0,8,9> 678 ; AVX-NEXT: retq 679 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 undef, i8 undef, i8 -1, i8 -1, i8 15, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 7, i8 6>) 680 ret <16 x i8> %1 681 } 682 683 ; FIXME - unnecessary pshufb/broadcast being used - pshufb mask only needs lowest byte. 684 define <16 x i8> @constant_fold_pshufb_2() { 685 ; SSE-LABEL: constant_fold_pshufb_2: 686 ; SSE: # %bb.0: 687 ; SSE-NEXT: movl $2, %eax 688 ; SSE-NEXT: movd %eax, %xmm0 689 ; SSE-NEXT: pxor %xmm1, %xmm1 690 ; SSE-NEXT: pshufb %xmm1, %xmm0 691 ; SSE-NEXT: retq 692 ; 693 ; AVX1-LABEL: constant_fold_pshufb_2: 694 ; AVX1: # %bb.0: 695 ; AVX1-NEXT: movl $2, %eax 696 ; AVX1-NEXT: vmovd %eax, %xmm0 697 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 698 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 699 ; AVX1-NEXT: retq 700 ; 701 ; AVX2-LABEL: constant_fold_pshufb_2: 702 ; AVX2: # %bb.0: 703 ; AVX2-NEXT: movl $2, %eax 704 ; AVX2-NEXT: vmovd %eax, %xmm0 705 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 706 ; AVX2-NEXT: retq 707 ; 708 ; AVX512F-LABEL: constant_fold_pshufb_2: 709 ; AVX512F: # %bb.0: 710 ; AVX512F-NEXT: movl $2, %eax 711 ; AVX512F-NEXT: vmovd %eax, %xmm0 712 ; AVX512F-NEXT: vpbroadcastb %xmm0, %xmm0 713 ; AVX512F-NEXT: retq 714 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 2, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) 715 ret <16 x i8> %1 716 } 717 718 define i32 @mask_zzz3_v16i8(<16 x i8> %a0) { 719 ; SSSE3-LABEL: mask_zzz3_v16i8: 720 ; SSSE3: # %bb.0: 721 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[14,u,u,u,u,u,u,u,u,u,u,u,u] 722 ; SSSE3-NEXT: movd %xmm0, %eax 723 ; SSSE3-NEXT: retq 724 ; 725 ; SSE41-LABEL: mask_zzz3_v16i8: 726 ; SSE41: # %bb.0: 727 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,xmm0[14] 728 ; SSE41-NEXT: pextrd $3, %xmm0, %eax 729 ; SSE41-NEXT: retq 730 ; 731 ; AVX-LABEL: mask_zzz3_v16i8: 732 ; AVX: # %bb.0: 733 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,xmm0[14] 734 ; AVX-NEXT: vpextrd $3, %xmm0, %eax 735 ; AVX-NEXT: retq 736 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14>) 737 %2 = bitcast <16 x i8> %1 to <4 x i32> 738 %3 = extractelement <4 x i32> %2, i32 3 739 %4 = and i32 %3, 4278190080 740 ret i32 %4 741 } 742 743 define i32 @mask_z1z3_v16i8(<16 x i8> %a0) { 744 ; SSSE3-LABEL: mask_z1z3_v16i8: 745 ; SSSE3: # %bb.0: 746 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[10],zero,xmm0[14,u,u,u,u,u,u,u,u,u,u,u,u] 747 ; SSSE3-NEXT: movd %xmm0, %eax 748 ; SSSE3-NEXT: retq 749 ; 750 ; SSE41-LABEL: mask_z1z3_v16i8: 751 ; SSE41: # %bb.0: 752 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,xmm0[10],zero,xmm0[14] 753 ; SSE41-NEXT: pextrd $3, %xmm0, %eax 754 ; SSE41-NEXT: retq 755 ; 756 ; AVX-LABEL: mask_z1z3_v16i8: 757 ; AVX: # %bb.0: 758 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,xmm0[10],zero,xmm0[14] 759 ; AVX-NEXT: vpextrd $3, %xmm0, %eax 760 ; AVX-NEXT: retq 761 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14>) 762 %2 = bitcast <16 x i8> %1 to <4 x i32> 763 %3 = extractelement <4 x i32> %2, i32 3 764 %4 = and i32 %3, 4278255360 765 ret i32 %4 766 } 767 768 define i32 @PR22415(double %a0) { 769 ; SSE-LABEL: PR22415: 770 ; SSE: # %bb.0: 771 ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] 772 ; SSE-NEXT: movd %xmm0, %eax 773 ; SSE-NEXT: retq 774 ; 775 ; AVX-LABEL: PR22415: 776 ; AVX: # %bb.0: 777 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] 778 ; AVX-NEXT: vmovd %xmm0, %eax 779 ; AVX-NEXT: retq 780 %1 = bitcast double %a0 to <8 x i8> 781 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 undef> 782 %3 = shufflevector <4 x i8> %2, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2> 783 %4 = bitcast <3 x i8> %3 to i24 784 %5 = zext i24 %4 to i32 785 ret i32 %5 786 } 787