1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=AMD10H 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2 5 6 ; 7 ; EXTRQI 8 ; 9 10 ; A length of zero is equivalent to a bit length of 64. 11 define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) { 12 ; ALL-LABEL: extrqi_len0_idx0: 13 ; ALL: # %bb.0: 14 ; ALL-NEXT: retq 15 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0) 16 ret <2 x i64> %1 17 } 18 19 define <2 x i64> @extrqi_len8_idx16(<2 x i64> %a) { 20 ; ALL-LABEL: extrqi_len8_idx16: 21 ; ALL: # %bb.0: 22 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 23 ; ALL-NEXT: retq 24 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 8, i8 16) 25 ret <2 x i64> %1 26 } 27 28 ; If the length + index exceeds the bottom 64 bits the result is undefined. 29 define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) { 30 ; ALL-LABEL: extrqi_len32_idx48: 31 ; ALL: # %bb.0: 32 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 33 ; ALL-NEXT: retq 34 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 32, i8 48) 35 ret <2 x i64> %1 36 } 37 38 define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) { 39 ; AMD10H-LABEL: shuf_0zzzuuuuuuuuuuuu: 40 ; AMD10H: # %bb.0: 41 ; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 42 ; AMD10H-NEXT: retq 43 ; 44 ; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu: 45 ; BTVER1: # %bb.0: 46 ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 47 ; BTVER1-NEXT: retq 48 ; 49 ; BTVER2-LABEL: shuf_0zzzuuuuuuuuuuuu: 50 ; BTVER2: # %bb.0: 51 ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 52 ; BTVER2-NEXT: retq 53 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 54 ret <16 x i8> %s 55 } 56 57 define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) { 58 ; AMD10H-LABEL: shuf_0zzzzzzz1zzzzzzz: 59 ; AMD10H: # %bb.0: 60 ; AMD10H-NEXT: movdqa %xmm0, %xmm1 61 ; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 62 ; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 63 ; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 64 ; AMD10H-NEXT: retq 65 ; 66 ; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz: 67 ; BTVER1: # %bb.0: 68 ; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 69 ; BTVER1-NEXT: retq 70 ; 71 ; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz: 72 ; BTVER2: # %bb.0: 73 ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 74 ; BTVER2-NEXT: retq 75 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 76 ret <16 x i8> %s 77 } 78 79 define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) { 80 ; AMD10H-LABEL: shuf_2zzzzzzz3zzzzzzz: 81 ; AMD10H: # %bb.0: 82 ; AMD10H-NEXT: movdqa %xmm0, %xmm1 83 ; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 84 ; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 85 ; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 86 ; AMD10H-NEXT: retq 87 ; 88 ; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz: 89 ; BTVER1: # %bb.0: 90 ; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 91 ; BTVER1-NEXT: retq 92 ; 93 ; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz: 94 ; BTVER2: # %bb.0: 95 ; BTVER2-NEXT: vpsrld $16, %xmm0, %xmm0 96 ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 97 ; BTVER2-NEXT: retq 98 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 99 ret <16 x i8> %s 100 } 101 102 define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) { 103 ; AMD10H-LABEL: shuf_01zzuuuuuuuuuuuu: 104 ; AMD10H: # %bb.0: 105 ; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 106 ; AMD10H-NEXT: retq 107 ; 108 ; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu: 109 ; BTVER1: # %bb.0: 110 ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 111 ; BTVER1-NEXT: retq 112 ; 113 ; BTVER2-LABEL: shuf_01zzuuuuuuuuuuuu: 114 ; BTVER2: # %bb.0: 115 ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 116 ; BTVER2-NEXT: retq 117 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 118 ret <16 x i8> %s 119 } 120 121 define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) { 122 ; AMD10H-LABEL: shuf_01zzzzzz23zzzzzz: 123 ; AMD10H: # %bb.0: 124 ; AMD10H-NEXT: movdqa %xmm0, %xmm1 125 ; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 126 ; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 127 ; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 128 ; AMD10H-NEXT: retq 129 ; 130 ; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz: 131 ; BTVER1: # %bb.0: 132 ; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero 133 ; BTVER1-NEXT: retq 134 ; 135 ; BTVER2-LABEL: shuf_01zzzzzz23zzzzzz: 136 ; BTVER2: # %bb.0: 137 ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 138 ; BTVER2-NEXT: retq 139 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 140 ret <16 x i8> %s 141 } 142 143 define <16 x i8> @shuf_1zzzuuuuuuuuuuuu(<16 x i8> %a0) { 144 ; ALL-LABEL: shuf_1zzzuuuuuuuuuuuu: 145 ; ALL: # %bb.0: 146 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 147 ; ALL-NEXT: retq 148 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 149 ret <16 x i8> %s 150 } 151 152 define <8 x i16> @shuf_1zzzuuuu(<8 x i16> %a0) { 153 ; ALL-LABEL: shuf_1zzzuuuu: 154 ; ALL: # %bb.0: 155 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 156 ; ALL-NEXT: retq 157 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 158 ret <8 x i16> %s 159 } 160 161 define <8 x i16> @shuf_12zzuuuu(<8 x i16> %a0) { 162 ; ALL-LABEL: shuf_12zzuuuu: 163 ; ALL: # %bb.0: 164 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 165 ; ALL-NEXT: retq 166 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 167 ret <8 x i16> %s 168 } 169 170 define <8 x i16> @shuf_012zuuuu(<8 x i16> %a0) { 171 ; AMD10H-LABEL: shuf_012zuuuu: 172 ; AMD10H: # %bb.0: 173 ; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 174 ; AMD10H-NEXT: retq 175 ; 176 ; BTVER1-LABEL: shuf_012zuuuu: 177 ; BTVER1: # %bb.0: 178 ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 179 ; BTVER1-NEXT: retq 180 ; 181 ; BTVER2-LABEL: shuf_012zuuuu: 182 ; BTVER2: # %bb.0: 183 ; BTVER2-NEXT: vpxor %xmm1, %xmm1, %xmm1 184 ; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 185 ; BTVER2-NEXT: retq 186 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 187 ret <8 x i16> %s 188 } 189 190 define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) { 191 ; AMD10H-LABEL: shuf_0zzz1zzz: 192 ; AMD10H: # %bb.0: 193 ; AMD10H-NEXT: movdqa %xmm0, %xmm1 194 ; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 195 ; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 196 ; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 197 ; AMD10H-NEXT: retq 198 ; 199 ; BTVER1-LABEL: shuf_0zzz1zzz: 200 ; BTVER1: # %bb.0: 201 ; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero 202 ; BTVER1-NEXT: retq 203 ; 204 ; BTVER2-LABEL: shuf_0zzz1zzz: 205 ; BTVER2: # %bb.0: 206 ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 207 ; BTVER2-NEXT: retq 208 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8> 209 ret <8 x i16> %s 210 } 211 212 define <4 x i32> @shuf_0z1z(<4 x i32> %a0) { 213 ; AMD10H-LABEL: shuf_0z1z: 214 ; AMD10H: # %bb.0: 215 ; AMD10H-NEXT: xorps %xmm1, %xmm1 216 ; AMD10H-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 217 ; AMD10H-NEXT: retq 218 ; 219 ; BTVER1-LABEL: shuf_0z1z: 220 ; BTVER1: # %bb.0: 221 ; BTVER1-NEXT: xorps %xmm1, %xmm1 222 ; BTVER1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 223 ; BTVER1-NEXT: retq 224 ; 225 ; BTVER2-LABEL: shuf_0z1z: 226 ; BTVER2: # %bb.0: 227 ; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 228 ; BTVER2-NEXT: retq 229 %s = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 4> 230 ret <4 x i32> %s 231 } 232 233 ; 234 ; INSERTQI 235 ; 236 237 ; A length of zero is equivalent to a bit length of 64. 238 define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) { 239 ; AMD10H-LABEL: insertqi_len0_idx0: 240 ; AMD10H: # %bb.0: 241 ; AMD10H-NEXT: movaps %xmm1, %xmm0 242 ; AMD10H-NEXT: retq 243 ; 244 ; BTVER1-LABEL: insertqi_len0_idx0: 245 ; BTVER1: # %bb.0: 246 ; BTVER1-NEXT: movaps %xmm1, %xmm0 247 ; BTVER1-NEXT: retq 248 ; 249 ; BTVER2-LABEL: insertqi_len0_idx0: 250 ; BTVER2: # %bb.0: 251 ; BTVER2-NEXT: vmovaps %xmm1, %xmm0 252 ; BTVER2-NEXT: retq 253 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0) 254 ret <2 x i64> %1 255 } 256 257 define <2 x i64> @insertqi_len8_idx16(<2 x i64> %a, <2 x i64> %b) { 258 ; ALL-LABEL: insertqi_len8_idx16: 259 ; ALL: # %bb.0: 260 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u] 261 ; ALL-NEXT: retq 262 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 8, i8 16) 263 ret <2 x i64> %1 264 } 265 266 ; If the length + index exceeds the bottom 64 bits the result is undefined 267 define <2 x i64> @insertqi_len32_idx48(<2 x i64> %a, <2 x i64> %b) { 268 ; ALL-LABEL: insertqi_len32_idx48: 269 ; ALL: # %bb.0: 270 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 271 ; ALL-NEXT: retq 272 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 32, i8 48) 273 ret <2 x i64> %1 274 } 275 276 define <16 x i8> @shuf_0_0_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 277 ; ALL-LABEL: shuf_0_0_2_3_uuuu_uuuu_uuuu: 278 ; ALL: # %bb.0: 279 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u] 280 ; ALL-NEXT: retq 281 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 0, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 282 ret <16 x i8> %s 283 } 284 285 define <16 x i8> @shuf_0_16_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 286 ; ALL-LABEL: shuf_0_16_2_3_uuuu_uuuu_uuuu: 287 ; ALL: # %bb.0: 288 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u] 289 ; ALL-NEXT: retq 290 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 291 ret <16 x i8> %s 292 } 293 294 define <16 x i8> @shuf_16_1_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 295 ; ALL-LABEL: shuf_16_1_2_3_uuuu_uuuu_uuuu: 296 ; ALL: # %bb.0: 297 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7,u,u,u,u,u,u,u,u] 298 ; ALL-NEXT: retq 299 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 300 ret <16 x i8> %s 301 } 302 303 define <8 x i16> @shuf_0823uuuu(<8 x i16> %a0, <8 x i16> %a1) { 304 ; ALL-LABEL: shuf_0823uuuu: 305 ; ALL: # %bb.0: 306 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1],xmm0[4,5,6,7,u,u,u,u,u,u,u,u] 307 ; ALL-NEXT: retq 308 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 309 ret <8 x i16> %s 310 } 311 312 define <8 x i16> @shuf_0183uuuu(<8 x i16> %a0, <8 x i16> %a1) { 313 ; ALL-LABEL: shuf_0183uuuu: 314 ; ALL: # %bb.0: 315 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[0,1],xmm0[6,7,u,u,u,u,u,u,u,u] 316 ; ALL-NEXT: retq 317 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 318 ret <8 x i16> %s 319 } 320 321 define <8 x i16> @shuf_0128uuuu(<8 x i16> %a0, <8 x i16> %a1) { 322 ; ALL-LABEL: shuf_0128uuuu: 323 ; ALL: # %bb.0: 324 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[0,1],xmm0[u,u,u,u,u,u,u,u] 325 ; ALL-NEXT: retq 326 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 327 ret <8 x i16> %s 328 } 329 330 define <8 x i16> @shuf_0893uuuu(<8 x i16> %a0, <8 x i16> %a1) { 331 ; ALL-LABEL: shuf_0893uuuu: 332 ; ALL: # %bb.0: 333 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u] 334 ; ALL-NEXT: retq 335 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 336 ret <8 x i16> %s 337 } 338 339 define <8 x i16> @shuf_089Auuuu(<8 x i16> %a0, <8 x i16> %a1) { 340 ; ALL-LABEL: shuf_089Auuuu: 341 ; ALL: # %bb.0: 342 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3,4,5],xmm0[u,u,u,u,u,u,u,u] 343 ; ALL-NEXT: retq 344 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef> 345 ret <8 x i16> %s 346 } 347 348 define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) { 349 ; ALL-LABEL: shuf_089uuuuu: 350 ; ALL: # %bb.0: 351 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u] 352 ; ALL-NEXT: retq 353 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 354 ret <8 x i16> %s 355 } 356 357 ; 358 ; Special Cases 359 ; 360 361 ; Out of range. 362 define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) { 363 ; AMD10H-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: 364 ; AMD10H: # %bb.0: 365 ; AMD10H-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 366 ; AMD10H-NEXT: andpd {{.*}}(%rip), %xmm0 367 ; AMD10H-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 368 ; AMD10H-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7] 369 ; AMD10H-NEXT: packuswb %xmm0, %xmm0 370 ; AMD10H-NEXT: retq 371 ; 372 ; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: 373 ; BTVER1: # %bb.0: 374 ; BTVER1-NEXT: psrld $16, %xmm1 375 ; BTVER1-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 376 ; BTVER1-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 377 ; BTVER1-NEXT: retq 378 ; 379 ; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: 380 ; BTVER2: # %bb.0: 381 ; BTVER2-NEXT: vpsrld $16, %xmm1, %xmm1 382 ; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 383 ; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 384 ; BTVER2-NEXT: retq 385 %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 386 ret <16 x i8> %1 387 } 388 389 define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 390 ; AMD10H-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 391 ; AMD10H: # %bb.0: 392 ; AMD10H-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 393 ; AMD10H-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 394 ; AMD10H-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 395 ; AMD10H-NEXT: retq 396 ; 397 ; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 398 ; BTVER1: # %bb.0: 399 ; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] 400 ; BTVER1-NEXT: retq 401 ; 402 ; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 403 ; BTVER2: # %bb.0: 404 ; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] 405 ; BTVER2-NEXT: retq 406 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 407 ret <16 x i8> %1 408 } 409 410 define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 411 ; AMD10H-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 412 ; AMD10H: # %bb.0: 413 ; AMD10H-NEXT: psrlq $16, %xmm0 414 ; AMD10H-NEXT: pand {{.*}}(%rip), %xmm0 415 ; AMD10H-NEXT: retq 416 ; 417 ; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 418 ; BTVER1: # %bb.0: 419 ; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] 420 ; BTVER1-NEXT: retq 421 ; 422 ; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 423 ; BTVER2: # %bb.0: 424 ; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] 425 ; BTVER2-NEXT: retq 426 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 16, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 427 ret <16 x i8> %1 428 } 429 430 define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 431 ; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 432 ; ALL: # %bb.0: 433 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 434 ; ALL-NEXT: retq 435 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 undef, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 436 ret <16 x i8> %1 437 } 438 439 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind 440 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind 441