1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-unknown" 5 6 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) { 7 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00 8 ; CHECK-SSE2: # BB#0: 9 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 10 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] 11 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] 12 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] 13 ; CHECK-SSE2-NEXT: retq 14 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 15 ret <16 x i8> %shuffle 16 } 17 18 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) { 19 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01 20 ; CHECK-SSE2: # BB#0: 21 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 22 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] 23 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] 24 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5] 25 ; CHECK-SSE2-NEXT: retq 26 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 27 ret <16 x i8> %shuffle 28 } 29 30 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) { 31 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08 32 ; CHECK-SSE2: # BB#0: 33 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] 34 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,2,4,5,6,7] 35 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 36 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] 37 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6] 38 ; CHECK-SSE2-NEXT: retq 39 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 40 ret <16 x i8> %shuffle 41 } 42 43 define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) { 44 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03 45 ; CHECK-SSE2: # BB#0: 46 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 47 ; CHECK-SSE2-NEXT: punpcklwd %xmm0, %xmm0 48 ; CHECK-SSE2-NEXT: retq 49 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> 50 ret <16 x i8> %shuffle 51 } 52 53 define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) { 54 ; CHECK-SSE2-LABEL: @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07 55 ; CHECK-SSE2: # BB#0: 56 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 57 ; CHECK-SSE2-NEXT: punpckhwd %xmm0, %xmm0 58 ; CHECK-SSE2-NEXT: retq 59 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> 60 ret <16 x i8> %shuffle 61 } 62 63 define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) { 64 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12 65 ; CHECK-SSE2: # BB#0: 66 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] 67 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] 68 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] 69 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 70 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7] 71 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6] 72 ; CHECK-SSE2-NEXT: retq 73 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 74 ret <16 x i8> %shuffle 75 } 76 77 define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) { 78 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07 79 ; CHECK-SSE2: # BB#0: 80 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 81 ; CHECK-SSE2-NEXT: retq 82 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 83 ret <16 x i8> %shuffle 84 } 85 86 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { 87 ; CHECK-SSE2-LABEL: @shuffle_v16i8_0101010101010101 88 ; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] 89 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] 90 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] 91 ; CHECK-SSE2-NEXT: retq 92 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 93 ret <16 x i8> %shuffle 94 } 95 96 define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) { 97 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23 98 ; CHECK-SSE2: punpcklbw %xmm1, %xmm0 99 ; CHECK-SSE2-NEXT: retq 100 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 101 ret <16 x i8> %shuffle 102 } 103 104 define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { 105 ; CHECK-SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07 106 ; CHECK-SSE2: # BB#0: 107 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm1 108 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7] 109 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm1 110 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 111 ; CHECK-SSE2-NEXT: retq 112 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7> 113 ret <16 x i8> %shuffle 114 } 115 116 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) { 117 ; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12 118 ; CHECK-SSE2: pxor %xmm1, %xmm1 119 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 120 ; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm2 121 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm2 = xmm2[3,2,1,0,4,5,6,7] 122 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4] 123 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 124 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] 125 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4] 126 ; CHECK-SSE2-NEXT: packuswb %xmm2, %xmm0 127 ; CHECK-SSE2-NEXT: retq 128 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 129 ret <16 x i8> %shuffle 130 } 131 132 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { 133 ; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20 134 ; CHECK-SSE2: pxor %xmm2, %xmm2 135 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm1 136 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7] 137 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4] 138 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm0 139 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] 140 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4] 141 ; CHECK-SSE2-NEXT: packuswb %xmm1, %xmm0 142 ; CHECK-SSE2-NEXT: retq 143 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20> 144 ret <16 x i8> %shuffle 145 } 146 147 define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { 148 ; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20 149 ; CHECK-SSE2: pxor %xmm2, %xmm2 150 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm3 151 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm3 152 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4] 153 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4 154 ; CHECK-SSE2-NEXT: punpckhbw %xmm2, %xmm4 155 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm4 = xmm4[3,2,1,0,4,5,6,7] 156 ; CHECK-SSE2-NEXT: shufpd {{.*}} # xmm4 = xmm4[0],xmm3[1] 157 ; CHECK-SSE2-NEXT: punpckhbw %xmm2, %xmm1 158 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4] 159 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm0 160 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] 161 ; CHECK-SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] 162 ; CHECK-SSE2-NEXT: packuswb %xmm4, %xmm0 163 ; CHECK-SSE2-NEXT: retq 164 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20> 165 ret <16 x i8> %shuffle 166 } 167 168 define <16 x i8> @zext_to_v8i16_shuffle(<16 x i8> %a) { 169 ; CHECK-SSE2-LABEL: @zext_to_v8i16_shuffle 170 ; CHECK-SSE2: pxor %xmm1, %xmm1 171 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 172 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31> 173 ret <16 x i8> %shuffle 174 } 175 176 define <16 x i8> @zext_to_v4i32_shuffle(<16 x i8> %a) { 177 ; CHECK-SSE2-LABEL: @zext_to_v4i32_shuffle 178 ; CHECK-SSE2: pxor %xmm1, %xmm1 179 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 180 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 181 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31> 182 ret <16 x i8> %shuffle 183 } 184 185 define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { 186 ; CHECK-SSE2-LABEL: @trunc_v4i32_shuffle 187 ; CHECK-SSE2: # BB#0: 188 ; CHECK-SSE2-NEXT: pand 189 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] 190 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] 191 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] 192 ; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0 193 ; CHECK-SSE2-NEXT: retq 194 %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 195 ret <16 x i8> %shuffle 196 } 197