1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2 4 5 ; 6 ; EXTRQI 7 ; 8 9 ; A length of zero is equivalent to a bit length of 64. 10 define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) { 11 ; ALL-LABEL: extrqi_len0_idx0: 12 ; ALL: # BB#0: 13 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,u,u,u,u,u,u,u,u] 14 ; ALL-NEXT: retq 15 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0) 16 ret <2 x i64> %1 17 } 18 19 define <2 x i64> @extrqi_len8_idx16(<2 x i64> %a) { 20 ; ALL-LABEL: extrqi_len8_idx16: 21 ; ALL: # BB#0: 22 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 23 ; ALL-NEXT: retq 24 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 8, i8 16) 25 ret <2 x i64> %1 26 } 27 28 ; If the length + index exceeds the bottom 64 bits the result is undefined. 29 define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) { 30 ; ALL-LABEL: extrqi_len32_idx48: 31 ; ALL: # BB#0: 32 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 33 ; ALL-NEXT: retq 34 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 32, i8 48) 35 ret <2 x i64> %1 36 } 37 38 define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) { 39 ; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu: 40 ; BTVER1: # BB#0: 41 ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 42 ; BTVER1-NEXT: retq 43 ; 44 ; BTVER2-LABEL: shuf_0zzzuuuuuuuuuuuu: 45 ; BTVER2: # BB#0: 46 ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 47 ; BTVER2-NEXT: retq 48 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 49 ret <16 x i8> %s 50 } 51 52 define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) { 53 ; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz: 54 ; BTVER1: # BB#0: 55 ; BTVER1-NEXT: movaps %xmm0, %xmm1 56 ; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 57 ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 58 ; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 59 ; BTVER1-NEXT: retq 60 ; 61 ; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz: 62 ; BTVER2: # BB#0: 63 ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 64 ; BTVER2-NEXT: retq 65 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 66 ret <16 x i8> %s 67 } 68 69 define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) { 70 ; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz: 71 ; BTVER1: # BB#0: 72 ; BTVER1-NEXT: movaps %xmm0, %xmm1 73 ; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 74 ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 75 ; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 76 ; BTVER1-NEXT: retq 77 ; 78 ; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz: 79 ; BTVER2: # BB#0: 80 ; BTVER2-NEXT: vpsrld $16, %xmm0, %xmm0 81 ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 82 ; BTVER2-NEXT: retq 83 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 84 ret <16 x i8> %s 85 } 86 87 define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) { 88 ; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu: 89 ; BTVER1: # BB#0: 90 ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 91 ; BTVER1-NEXT: retq 92 ; 93 ; BTVER2-LABEL: shuf_01zzuuuuuuuuuuuu: 94 ; BTVER2: # BB#0: 95 ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 96 ; BTVER2-NEXT: retq 97 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 98 ret <16 x i8> %s 99 } 100 101 define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) { 102 ; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz: 103 ; BTVER1: # BB#0: 104 ; BTVER1-NEXT: movaps %xmm0, %xmm1 105 ; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 106 ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 107 ; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 108 ; BTVER1-NEXT: retq 109 ; 110 ; BTVER2-LABEL: shuf_01zzzzzz23zzzzzz: 111 ; BTVER2: # BB#0: 112 ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 113 ; BTVER2-NEXT: retq 114 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 115 ret <16 x i8> %s 116 } 117 118 define <16 x i8> @shuf_1zzzuuuuuuuuuuuu(<16 x i8> %a0) { 119 ; ALL-LABEL: shuf_1zzzuuuuuuuuuuuu: 120 ; ALL: # BB#0: 121 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 122 ; ALL-NEXT: retq 123 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 124 ret <16 x i8> %s 125 } 126 127 define <8 x i16> @shuf_1zzzuuuu(<8 x i16> %a0) { 128 ; ALL-LABEL: shuf_1zzzuuuu: 129 ; ALL: # BB#0: 130 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 131 ; ALL-NEXT: retq 132 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 133 ret <8 x i16> %s 134 } 135 136 define <8 x i16> @shuf_12zzuuuu(<8 x i16> %a0) { 137 ; ALL-LABEL: shuf_12zzuuuu: 138 ; ALL: # BB#0: 139 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 140 ; ALL-NEXT: retq 141 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 142 ret <8 x i16> %s 143 } 144 145 define <8 x i16> @shuf_012zuuuu(<8 x i16> %a0) { 146 ; ALL-LABEL: shuf_012zuuuu: 147 ; ALL: # BB#0: 148 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 149 ; ALL-NEXT: retq 150 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 151 ret <8 x i16> %s 152 } 153 154 define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) { 155 ; BTVER1-LABEL: shuf_0zzz1zzz: 156 ; BTVER1: # BB#0: 157 ; BTVER1-NEXT: movaps %xmm0, %xmm1 158 ; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 159 ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 160 ; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 161 ; BTVER1-NEXT: retq 162 ; 163 ; BTVER2-LABEL: shuf_0zzz1zzz: 164 ; BTVER2: # BB#0: 165 ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 166 ; BTVER2-NEXT: retq 167 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8> 168 ret <8 x i16> %s 169 } 170 171 define <4 x i32> @shuf_0z1z(<4 x i32> %a0) { 172 ; BTVER1-LABEL: shuf_0z1z: 173 ; BTVER1: # BB#0: 174 ; BTVER1-NEXT: pxor %xmm1, %xmm1 175 ; BTVER1-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 176 ; BTVER1-NEXT: retq 177 ; 178 ; BTVER2-LABEL: shuf_0z1z: 179 ; BTVER2: # BB#0: 180 ; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 181 ; BTVER2-NEXT: retq 182 %s = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 4> 183 ret <4 x i32> %s 184 } 185 186 ; 187 ; INSERTQI 188 ; 189 190 ; A length of zero is equivalent to a bit length of 64. 191 define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) { 192 ; ALL-LABEL: insertqi_len0_idx0: 193 ; ALL: # BB#0: 194 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6,7],xmm0[u,u,u,u,u,u,u,u] 195 ; ALL-NEXT: retq 196 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0) 197 ret <2 x i64> %1 198 } 199 200 define <2 x i64> @insertqi_len8_idx16(<2 x i64> %a, <2 x i64> %b) { 201 ; ALL-LABEL: insertqi_len8_idx16: 202 ; ALL: # BB#0: 203 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u] 204 ; ALL-NEXT: retq 205 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 8, i8 16) 206 ret <2 x i64> %1 207 } 208 209 ; If the length + index exceeds the bottom 64 bits the result is undefined 210 define <2 x i64> @insertqi_len32_idx48(<2 x i64> %a, <2 x i64> %b) { 211 ; ALL-LABEL: insertqi_len32_idx48: 212 ; ALL: # BB#0: 213 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 214 ; ALL-NEXT: retq 215 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 32, i8 48) 216 ret <2 x i64> %1 217 } 218 219 define <16 x i8> @shuf_0_0_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 220 ; ALL-LABEL: shuf_0_0_2_3_uuuu_uuuu_uuuu: 221 ; ALL: # BB#0: 222 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u] 223 ; ALL-NEXT: retq 224 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 0, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 225 ret <16 x i8> %s 226 } 227 228 define <16 x i8> @shuf_0_16_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 229 ; ALL-LABEL: shuf_0_16_2_3_uuuu_uuuu_uuuu: 230 ; ALL: # BB#0: 231 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u] 232 ; ALL-NEXT: retq 233 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 234 ret <16 x i8> %s 235 } 236 237 define <16 x i8> @shuf_16_1_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 238 ; ALL-LABEL: shuf_16_1_2_3_uuuu_uuuu_uuuu: 239 ; ALL: # BB#0: 240 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7,u,u,u,u,u,u,u,u] 241 ; ALL-NEXT: retq 242 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 243 ret <16 x i8> %s 244 } 245 246 define <8 x i16> @shuf_0823uuuu(<8 x i16> %a0, <8 x i16> %a1) { 247 ; ALL-LABEL: shuf_0823uuuu: 248 ; ALL: # BB#0: 249 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1],xmm0[4,5,6,7,u,u,u,u,u,u,u,u] 250 ; ALL-NEXT: retq 251 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 252 ret <8 x i16> %s 253 } 254 255 define <8 x i16> @shuf_0183uuuu(<8 x i16> %a0, <8 x i16> %a1) { 256 ; ALL-LABEL: shuf_0183uuuu: 257 ; ALL: # BB#0: 258 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[0,1],xmm0[6,7,u,u,u,u,u,u,u,u] 259 ; ALL-NEXT: retq 260 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 261 ret <8 x i16> %s 262 } 263 264 define <8 x i16> @shuf_0128uuuu(<8 x i16> %a0, <8 x i16> %a1) { 265 ; ALL-LABEL: shuf_0128uuuu: 266 ; ALL: # BB#0: 267 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[0,1],xmm0[u,u,u,u,u,u,u,u] 268 ; ALL-NEXT: retq 269 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 270 ret <8 x i16> %s 271 } 272 273 define <8 x i16> @shuf_0893uuuu(<8 x i16> %a0, <8 x i16> %a1) { 274 ; ALL-LABEL: shuf_0893uuuu: 275 ; ALL: # BB#0: 276 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u] 277 ; ALL-NEXT: retq 278 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 279 ret <8 x i16> %s 280 } 281 282 define <8 x i16> @shuf_089Auuuu(<8 x i16> %a0, <8 x i16> %a1) { 283 ; ALL-LABEL: shuf_089Auuuu: 284 ; ALL: # BB#0: 285 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3,4,5],xmm0[u,u,u,u,u,u,u,u] 286 ; ALL-NEXT: retq 287 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef> 288 ret <8 x i16> %s 289 } 290 291 define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) { 292 ; ALL-LABEL: shuf_089uuuuu: 293 ; ALL: # BB#0: 294 ; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u] 295 ; ALL-NEXT: retq 296 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 297 ret <8 x i16> %s 298 } 299 300 ; 301 ; Special Cases 302 ; 303 304 ; Out of range. 305 define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) { 306 ; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: 307 ; BTVER1: # BB#0: 308 ; BTVER1-NEXT: psrld $16, %xmm1 309 ; BTVER1-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 310 ; BTVER1-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 311 ; BTVER1-NEXT: retq 312 ; 313 ; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: 314 ; BTVER2: # BB#0: 315 ; BTVER2-NEXT: vpsrld $16, %xmm1, %xmm1 316 ; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 317 ; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 318 ; BTVER2-NEXT: retq 319 %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 320 ret <16 x i8> %1 321 } 322 323 define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 324 ; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 325 ; BTVER1: # BB#0: 326 ; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] 327 ; BTVER1-NEXT: retq 328 ; 329 ; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 330 ; BTVER2: # BB#0: 331 ; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] 332 ; BTVER2-NEXT: retq 333 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 334 ret <16 x i8> %1 335 } 336 337 define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 338 ; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 339 ; BTVER1: # BB#0: 340 ; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] 341 ; BTVER1-NEXT: retq 342 ; 343 ; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 344 ; BTVER2: # BB#0: 345 ; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] 346 ; BTVER2-NEXT: retq 347 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 16, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 348 ret <16 x i8> %1 349 } 350 351 define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 352 ; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 353 ; ALL: # BB#0: 354 ; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 355 ; ALL-NEXT: retq 356 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 undef, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 357 ret <16 x i8> %1 358 } 359 360 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind 361 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind 362