1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefixes=ALL,KNL %s 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefixes=ALL,SKX %s 4 5 target triple = "x86_64-unknown-unknown" 6 7 define <32 x i16> @shuffle_v32i16(<32 x i16> %a) { 8 ; KNL-LABEL: shuffle_v32i16: 9 ; KNL: ## %bb.0: 10 ; KNL-NEXT: vpbroadcastw %xmm0, %ymm0 11 ; KNL-NEXT: vmovdqa %ymm0, %ymm1 12 ; KNL-NEXT: retq 13 ; 14 ; SKX-LABEL: shuffle_v32i16: 15 ; SKX: ## %bb.0: 16 ; SKX-NEXT: vpbroadcastw %xmm0, %zmm0 17 ; SKX-NEXT: retq 18 %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> zeroinitializer 19 ret <32 x i16> %c 20 } 21 22 define <32 x i16> @shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<32 x i16> %a) { 23 ; KNL-LABEL: shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08: 24 ; KNL: ## %bb.0: 25 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 26 ; KNL-NEXT: vpbroadcastw %xmm0, %ymm0 27 ; KNL-NEXT: vmovdqa %ymm0, %ymm1 28 ; KNL-NEXT: retq 29 ; 30 ; SKX-LABEL: shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08: 31 ; SKX: ## %bb.0: 32 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 33 ; SKX-NEXT: vpbroadcastw %xmm0, %zmm0 34 ; SKX-NEXT: retq 35 %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 36 ret <32 x i16> %c 37 } 38 39 define <32 x i16> @shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f(<32 x i16> %a) { 40 ; KNL-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f: 41 ; KNL: ## %bb.0: 42 ; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[4,5,10,11,4,5,6,7,14,15,2,3,4,5,2,3,20,21,26,27,20,21,22,23,30,31,18,19,20,21,18,19] 43 ; KNL-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1] 44 ; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm3[0,1,10,11,8,9,8,9,14,15,2,3,4,5,2,3,16,17,26,27,24,25,24,25,30,31,18,19,20,21,18,19] 45 ; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = <0,0,0,0,u,u,u,u,0,0,u,u,255,255,0,0,255,255,255,255,u,u,255,255,255,255,u,u,0,0,255,255> 46 ; KNL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0 47 ; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[0,1,10,11,8,9,8,9,14,15,6,7,4,5,14,15,16,17,26,27,24,25,24,25,30,31,22,23,20,21,30,31] 48 ; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = <255,255,255,255,u,u,u,u,255,255,u,u,0,0,255,255,0,0,0,0,u,u,0,0,0,0,u,u,255,255,u,u> 49 ; KNL-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm2 50 ; KNL-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,u,u,255,255,u,u,255,255,255,255,255,255,255,255,u,u,255,255,255,255,u,u,255,255,0,0> 51 ; KNL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 52 ; KNL-NEXT: retq 53 ; 54 ; SKX-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f: 55 ; SKX: ## %bb.0: 56 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1,2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,31> 57 ; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 58 ; SKX-NEXT: retq 59 %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1, i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 31> 60 ret <32 x i16> %c 61 } 62 63 define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38(<32 x i16> %a, <32 x i16> %b) { 64 ; KNL-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38: 65 ; KNL: ## %bb.0: 66 ; KNL-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] 67 ; KNL-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6],ymm2[7],ymm1[8,9,10,11],ymm2[12,13],ymm1[14],ymm2[15] 68 ; KNL-NEXT: vpshufb {{.*#+}} ymm1 = ymm2[u,u,14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,u,u] 69 ; KNL-NEXT: vpermq {{.*#+}} ymm4 = ymm0[2,3,0,1] 70 ; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm4[5,6,7],ymm0[8,9,10,11,12],ymm4[13,14,15] 71 ; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17,u,u] 72 ; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 73 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm3 74 ; KNL-NEXT: vpbroadcastw %xmm3, %ymm3 75 ; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0] 76 ; KNL-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 77 ; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17] 78 ; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] 79 ; KNL-NEXT: retq 80 ; 81 ; SKX-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38: 82 ; SKX: ## %bb.0: 83 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24,15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,56] 84 ; SKX-NEXT: vpermt2w %zmm1, %zmm2, %zmm0 85 ; SKX-NEXT: retq 86 %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24, i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 56> 87 ret <32 x i16> %c 88 } 89 90 define <32 x i16> @shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u(<32 x i16> %a, <32 x i16> %b) { 91 ; KNL-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u: 92 ; KNL: ## %bb.0: 93 ; KNL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11] 94 ; KNL-NEXT: retq 95 ; 96 ; SKX-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u: 97 ; SKX: ## %bb.0: 98 ; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 99 ; SKX-NEXT: retq 100 %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 101 ret <32 x i16> %c 102 } 103 104 define <32 x i16> @shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u(<32 x i16> %a, <32 x i16> %b) { 105 ; KNL-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u: 106 ; KNL: ## %bb.0: 107 ; KNL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15] 108 ; KNL-NEXT: retq 109 ; 110 ; SKX-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u: 111 ; SKX: ## %bb.0: 112 ; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 113 ; SKX-NEXT: retq 114 %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 115 ret <32 x i16> %c 116 } 117 118 define <32 x i16> @shuffle_v32i16_1_z_3_z_5_z_7_z_9_z_11_z_13_z_15_z_17_z_19_z_21_z_23_z_25_z_27_z_29_z_31_z(<32 x i16> %a, <32 x i16> %b) { 119 ; KNL-LABEL: shuffle_v32i16_1_z_3_z_5_z_7_z_9_z_11_z_13_z_15_z_17_z_19_z_21_z_23_z_25_z_27_z_29_z_31_z: 120 ; KNL: ## %bb.0: 121 ; KNL-NEXT: vpsrld $16, %ymm0, %ymm0 122 ; KNL-NEXT: vpsrld $16, %ymm1, %ymm1 123 ; KNL-NEXT: retq 124 ; 125 ; SKX-LABEL: shuffle_v32i16_1_z_3_z_5_z_7_z_9_z_11_z_13_z_15_z_17_z_19_z_21_z_23_z_25_z_27_z_29_z_31_z: 126 ; SKX: ## %bb.0: 127 ; SKX-NEXT: vpsrld $16, %zmm0, %zmm0 128 ; SKX-NEXT: retq 129 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 34, i32 3, i32 34, i32 5, i32 34, i32 7, i32 34, i32 9, i32 34, i32 11, i32 34, i32 13, i32 34, i32 15, i32 34, i32 17, i32 34, i32 19, i32 34, i32 21, i32 34, i32 23, i32 34, i32 25, i32 34, i32 27, i32 34, i32 29, i32 34, i32 31, i32 34> 130 ret <32 x i16> %c 131 } 132 133 define <32 x i16> @shuffle_v32i16_z_0_z_2_z_4_z_6_z_8_z_10_z_12_z_14_z_16_z_18_z_20_z_22_z_24_z_26_z_28_z_30(<32 x i16> %a, <32 x i16> %b) { 134 ; KNL-LABEL: shuffle_v32i16_z_0_z_2_z_4_z_6_z_8_z_10_z_12_z_14_z_16_z_18_z_20_z_22_z_24_z_26_z_28_z_30: 135 ; KNL: ## %bb.0: 136 ; KNL-NEXT: vpslld $16, %ymm0, %ymm0 137 ; KNL-NEXT: vpslld $16, %ymm1, %ymm1 138 ; KNL-NEXT: retq 139 ; 140 ; SKX-LABEL: shuffle_v32i16_z_0_z_2_z_4_z_6_z_8_z_10_z_12_z_14_z_16_z_18_z_20_z_22_z_24_z_26_z_28_z_30: 141 ; SKX: ## %bb.0: 142 ; SKX-NEXT: vpslld $16, %zmm0, %zmm0 143 ; SKX-NEXT: retq 144 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 34, i32 0, i32 34, i32 2, i32 34, i32 4, i32 34, i32 6, i32 34, i32 8, i32 34, i32 10, i32 34, i32 12, i32 34, i32 14, i32 34, i32 16, i32 34, i32 18, i32 34, i32 20, i32 34, i32 22, i32 34, i32 24, i32 34, i32 26, i32 34, i32 28, i32 34, i32 30> 145 ret <32 x i16> %c 146 } 147 148 define <32 x i16> @shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31(<32 x i16> %a, <32 x i16> %b) { 149 ; KNL-LABEL: shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31: 150 ; KNL: ## %bb.0: 151 ; KNL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15] 152 ; KNL-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15] 153 ; KNL-NEXT: retq 154 ; 155 ; SKX-LABEL: shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31: 156 ; SKX: ## %bb.0: 157 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15,17,17,16,16,20,21,22,23,25,25,24,24,28,29,30,31] 158 ; SKX-NEXT: retq 159 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31> 160 ret <32 x i16> %c 161 } 162 163 define <32 x i16> @shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28(<32 x i16> %a, <32 x i16> %b) { 164 ; KNL-LABEL: shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28: 165 ; KNL: ## %bb.0: 166 ; KNL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12] 167 ; KNL-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12] 168 ; KNL-NEXT: retq 169 ; 170 ; SKX-LABEL: shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28: 171 ; SKX: ## %bb.0: 172 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12,16,17,18,19,21,21,20,20,24,25,26,27,29,29,28,28] 173 ; SKX-NEXT: retq 174 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28> 175 ret <32 x i16> %c 176 } 177 178 define <32 x i16> @shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28(<32 x i16> %a, <32 x i16> %b) { 179 ; KNL-LABEL: shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28: 180 ; KNL: ## %bb.0: 181 ; KNL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15] 182 ; KNL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12] 183 ; KNL-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15] 184 ; KNL-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12] 185 ; KNL-NEXT: retq 186 ; 187 ; SKX-LABEL: shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28: 188 ; SKX: ## %bb.0: 189 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[2,3,2,3,0,1,0,1,10,11,10,11,8,9,8,9,18,19,18,19,16,17,16,17,26,27,26,27,24,25,24,25,34,35,34,35,32,33,32,33,42,43,42,43,40,41,40,41,50,51,50,51,48,49,48,49,58,59,58,59,56,57,56,57] 190 ; SKX-NEXT: retq 191 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 5, i32 5, i32 4, i32 4, i32 9, i32 9, i32 8, i32 8, i32 13, i32 13, i32 12, i32 12, i32 17, i32 17, i32 16, i32 16, i32 21, i32 21, i32 20, i32 20, i32 25, i32 25, i32 24, i32 24, i32 29, i32 29, i32 28, i32 28> 192 ret <32 x i16> %c 193 } 194 195 define <32 x i16> @shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<32 x i16> %a) { 196 ; KNL-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: 197 ; KNL: ## %bb.0: 198 ; KNL-NEXT: movl $65535, %eax ## imm = 0xFFFF 199 ; KNL-NEXT: vmovd %eax, %xmm1 200 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 201 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 202 ; KNL-NEXT: retq 203 ; 204 ; SKX-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: 205 ; SKX: ## %bb.0: 206 ; SKX-NEXT: movl $1, %eax 207 ; SKX-NEXT: kmovd %eax, %k1 208 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 209 ; SKX-NEXT: retq 210 %shuffle = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 211 ret <32 x i16> %shuffle 212 } 213 214 define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) { 215 ; KNL-LABEL: insert_dup_mem_v32i16_i32: 216 ; KNL: ## %bb.0: 217 ; KNL-NEXT: vpbroadcastw (%rdi), %ymm0 218 ; KNL-NEXT: vmovdqa %ymm0, %ymm1 219 ; KNL-NEXT: retq 220 ; 221 ; SKX-LABEL: insert_dup_mem_v32i16_i32: 222 ; SKX: ## %bb.0: 223 ; SKX-NEXT: movl (%rdi), %eax 224 ; SKX-NEXT: vpbroadcastw %eax, %zmm0 225 ; SKX-NEXT: retq 226 %tmp = load i32, i32* %ptr, align 4 227 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 228 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 229 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> zeroinitializer 230 ret <32 x i16> %tmp3 231 } 232 233 define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) { 234 ; KNL-LABEL: insert_dup_mem_v32i16_sext_i16: 235 ; KNL: ## %bb.0: 236 ; KNL-NEXT: movswl (%rdi), %eax 237 ; KNL-NEXT: vmovd %eax, %xmm0 238 ; KNL-NEXT: vpbroadcastw %xmm0, %ymm0 239 ; KNL-NEXT: vmovdqa %ymm0, %ymm1 240 ; KNL-NEXT: retq 241 ; 242 ; SKX-LABEL: insert_dup_mem_v32i16_sext_i16: 243 ; SKX: ## %bb.0: 244 ; SKX-NEXT: movswl (%rdi), %eax 245 ; SKX-NEXT: vpbroadcastw %eax, %zmm0 246 ; SKX-NEXT: retq 247 %tmp = load i16, i16* %ptr, align 2 248 %tmp1 = sext i16 %tmp to i32 249 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 250 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 251 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <32 x i32> zeroinitializer 252 ret <32 x i16> %tmp4 253 } 254 255 define <32 x i16> @insert_dup_elt1_mem_v32i16_i32(i32* %ptr) #0 { 256 ; KNL-LABEL: insert_dup_elt1_mem_v32i16_i32: 257 ; KNL: ## %bb.0: 258 ; KNL-NEXT: vpbroadcastw 2(%rdi), %ymm0 259 ; KNL-NEXT: vmovdqa %ymm0, %ymm1 260 ; KNL-NEXT: retq 261 ; 262 ; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32: 263 ; SKX: ## %bb.0: 264 ; SKX-NEXT: movzwl 2(%rdi), %eax 265 ; SKX-NEXT: vpbroadcastw %eax, %zmm0 266 ; SKX-NEXT: retq 267 %tmp = load i32, i32* %ptr, align 4 268 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 269 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 270 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 271 ret <32 x i16> %tmp3 272 } 273 274 define <32 x i16> @insert_dup_elt3_mem_v32i16_i32(i32* %ptr) #0 { 275 ; KNL-LABEL: insert_dup_elt3_mem_v32i16_i32: 276 ; KNL: ## %bb.0: 277 ; KNL-NEXT: vpbroadcastw 2(%rdi), %ymm0 278 ; KNL-NEXT: vmovdqa %ymm0, %ymm1 279 ; KNL-NEXT: retq 280 ; 281 ; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32: 282 ; SKX: ## %bb.0: 283 ; SKX-NEXT: movzwl 2(%rdi), %eax 284 ; SKX-NEXT: vpbroadcastw %eax, %zmm0 285 ; SKX-NEXT: retq 286 %tmp = load i32, i32* %ptr, align 4 287 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 288 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 289 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 290 ret <32 x i16> %tmp3 291 } 292 293 define <32 x i16> @shuffle_v32i16_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz(<32 x i16> %a) { 294 ; KNL-LABEL: shuffle_v32i16_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz: 295 ; KNL: ## %bb.0: 296 ; KNL-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 297 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 298 ; KNL-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 299 ; KNL-NEXT: vmovdqa %ymm2, %ymm0 300 ; KNL-NEXT: retq 301 ; 302 ; SKX-LABEL: shuffle_v32i16_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz: 303 ; SKX: ## %bb.0: 304 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 305 ; SKX-NEXT: retq 306 %shuffle = shufflevector <32 x i16> zeroinitializer, <32 x i16> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 36, i32 0, i32 0, i32 0, i32 37, i32 0, i32 0, i32 0, i32 38, i32 0, i32 0, i32 0, i32 39, i32 0, i32 0, i32 0> 307 ret <32 x i16> %shuffle 308 } 309 310 define <32 x i16> @shuffle_v32i16_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz(<32 x i16> %a) { 311 ; KNL-LABEL: shuffle_v32i16_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz: 312 ; KNL: ## %bb.0: 313 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 314 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 315 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 316 ; KNL-NEXT: vmovdqa %ymm2, %ymm0 317 ; KNL-NEXT: retq 318 ; 319 ; SKX-LABEL: shuffle_v32i16_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz: 320 ; SKX: ## %bb.0: 321 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 322 ; SKX-NEXT: retq 323 %shuffle = shufflevector <32 x i16> zeroinitializer, <32 x i16> %a, <32 x i32> <i32 32, i32 0, i32 33, i32 0, i32 34, i32 0, i32 35, i32 0, i32 36, i32 0, i32 37, i32 0, i32 38, i32 0, i32 39, i32 0, i32 40, i32 0, i32 41, i32 0, i32 42, i32 0, i32 43, i32 0, i32 44, i32 0, i32 45, i32 0, i32 46, i32 0, i32 47, i32 0> 324 ret <32 x i16> %shuffle 325 } 326 327 define <8 x i16> @pr32967(<32 x i16> %v) { 328 ; KNL-LABEL: pr32967: 329 ; KNL: ## %bb.0: 330 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 331 ; KNL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 332 ; KNL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,1,3,4,5,6,7] 333 ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 334 ; KNL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7] 335 ; KNL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 336 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 337 ; KNL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 338 ; KNL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7] 339 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 340 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7] 341 ; KNL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 342 ; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 343 ; KNL-NEXT: retq 344 ; 345 ; SKX-LABEL: pr32967: 346 ; SKX: ## %bb.0: 347 ; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm2 348 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,u,u,u,u,u,u,u,u> 349 ; SKX-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 350 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 351 ; SKX-NEXT: vzeroupper 352 ; SKX-NEXT: retq 353 %shuffle = shufflevector <32 x i16> %v, <32 x i16> undef, <8 x i32> <i32 1,i32 5,i32 9,i32 13,i32 17,i32 21,i32 25,i32 29> 354 ret <8 x i16> %shuffle 355 } 356 357 define <32 x i16> @shuffle_v32i16_07_zz_05_zz_03_zz_01_zz_15_zz_13_zz_11_zz_09_zz_23_zz_21_zz_19_zz_17_zz_31_zz_29_zz_27_zz_25_zz(<32 x i16> %a) { 358 ; KNL-LABEL: shuffle_v32i16_07_zz_05_zz_03_zz_01_zz_15_zz_13_zz_11_zz_09_zz_23_zz_21_zz_19_zz_17_zz_31_zz_29_zz_27_zz_25_zz: 359 ; KNL: ## %bb.0: 360 ; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15],zero,zero,ymm0[10,11],zero,zero,ymm0[6,7],zero,zero,ymm0[2,3],zero,zero,ymm0[30,31],zero,zero,ymm0[26,27],zero,zero,ymm0[22,23],zero,zero,ymm0[18,19],zero,zero 361 ; KNL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[14,15],zero,zero,ymm1[10,11],zero,zero,ymm1[6,7],zero,zero,ymm1[2,3],zero,zero,ymm1[30,31],zero,zero,ymm1[26,27],zero,zero,ymm1[22,23],zero,zero,ymm1[20,21],zero,zero 362 ; KNL-NEXT: retq 363 ; 364 ; SKX-LABEL: shuffle_v32i16_07_zz_05_zz_03_zz_01_zz_15_zz_13_zz_11_zz_09_zz_23_zz_21_zz_19_zz_17_zz_31_zz_29_zz_27_zz_25_zz: 365 ; SKX: ## %bb.0: 366 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[14,15],zero,zero,zmm0[10,11],zero,zero,zmm0[6,7],zero,zero,zmm0[2,3],zero,zero,zmm0[30,31],zero,zero,zmm0[26,27],zero,zero,zmm0[22,23],zero,zero,zmm0[18,19],zero,zero,zmm0[46,47],zero,zero,zmm0[42,43],zero,zero,zmm0[38,39],zero,zero,zmm0[34,35],zero,zero,zmm0[62,63],zero,zero,zmm0[58,59],zero,zero,zmm0[54,55],zero,zero,zmm0[52,53],zero,zero 367 ; SKX-NEXT: retq 368 %shuffle = shufflevector <32 x i16> zeroinitializer, <32 x i16> %a, <32 x i32> <i32 39, i32 0, i32 37, i32 0, i32 35, i32 0, i32 33, i32 0, i32 47, i32 0, i32 45, i32 0, i32 43, i32 0, i32 41, i32 0, i32 55, i32 0, i32 53, i32 0, i32 51, i32 0, i32 49, i32 0, i32 63, i32 0, i32 61, i32 0, i32 59, i32 0, i32 58, i32 0> 369 ret <32 x i16> %shuffle 370 } 371