1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s 3 4 define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) { 5 ; CHECK-LABEL: test_16xi8_perm_mask0: 6 ; CHECK: # %bb.0: 7 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] 8 ; CHECK-NEXT: retq 9 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 10 ret <16 x i8> %res 11 } 12 define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 13 ; CHECK-LABEL: test_masked_16xi8_perm_mask0: 14 ; CHECK: # %bb.0: 15 ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 16 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] 17 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 18 ; CHECK-NEXT: retq 19 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 20 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 21 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 22 ret <16 x i8> %res 23 } 24 25 define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) { 26 ; CHECK-LABEL: test_masked_z_16xi8_perm_mask0: 27 ; CHECK: # %bb.0: 28 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 29 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] 30 ; CHECK-NEXT: retq 31 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 32 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 33 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 34 ret <16 x i8> %res 35 } 36 define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 37 ; CHECK-LABEL: test_masked_16xi8_perm_mask1: 38 ; CHECK: # %bb.0: 39 ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 40 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] 41 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 42 ; CHECK-NEXT: retq 43 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 44 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 45 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 46 ret <16 x i8> %res 47 } 48 49 define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) { 50 ; CHECK-LABEL: test_masked_z_16xi8_perm_mask1: 51 ; CHECK: # %bb.0: 52 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 53 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] 54 ; CHECK-NEXT: retq 55 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 56 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 57 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 58 ret <16 x i8> %res 59 } 60 define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 61 ; CHECK-LABEL: test_masked_16xi8_perm_mask2: 62 ; CHECK: # %bb.0: 63 ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 64 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] 65 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 66 ; CHECK-NEXT: retq 67 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 68 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 69 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 70 ret <16 x i8> %res 71 } 72 73 define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) { 74 ; CHECK-LABEL: test_masked_z_16xi8_perm_mask2: 75 ; CHECK: # %bb.0: 76 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 77 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] 78 ; CHECK-NEXT: retq 79 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 80 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 81 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 82 ret <16 x i8> %res 83 } 84 define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) { 85 ; CHECK-LABEL: test_16xi8_perm_mask3: 86 ; CHECK: # %bb.0: 87 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] 88 ; CHECK-NEXT: retq 89 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 90 ret <16 x i8> %res 91 } 92 define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 93 ; CHECK-LABEL: test_masked_16xi8_perm_mask3: 94 ; CHECK: # %bb.0: 95 ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1 96 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] 97 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 98 ; CHECK-NEXT: retq 99 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 100 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 101 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 102 ret <16 x i8> %res 103 } 104 105 define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) { 106 ; CHECK-LABEL: test_masked_z_16xi8_perm_mask3: 107 ; CHECK: # %bb.0: 108 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 109 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] 110 ; CHECK-NEXT: retq 111 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 112 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 113 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 114 ret <16 x i8> %res 115 } 116 define <16 x i8> @test_16xi8_perm_mem_mask0(<16 x i8>* %vp) { 117 ; CHECK-LABEL: test_16xi8_perm_mem_mask0: 118 ; CHECK: # %bb.0: 119 ; CHECK-NEXT: vmovdqa (%rdi), %xmm0 120 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] 121 ; CHECK-NEXT: retq 122 %vec = load <16 x i8>, <16 x i8>* %vp 123 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 124 ret <16 x i8> %res 125 } 126 define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 127 ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask0: 128 ; CHECK: # %bb.0: 129 ; CHECK-NEXT: vmovdqa (%rdi), %xmm2 130 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 131 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] 132 ; CHECK-NEXT: retq 133 %vec = load <16 x i8>, <16 x i8>* %vp 134 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 135 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 136 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 137 ret <16 x i8> %res 138 } 139 140 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %mask) { 141 ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask0: 142 ; CHECK: # %bb.0: 143 ; CHECK-NEXT: vmovdqa (%rdi), %xmm1 144 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 145 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] 146 ; CHECK-NEXT: retq 147 %vec = load <16 x i8>, <16 x i8>* %vp 148 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 149 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 150 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 151 ret <16 x i8> %res 152 } 153 154 define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 155 ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask1: 156 ; CHECK: # %bb.0: 157 ; CHECK-NEXT: vmovdqa (%rdi), %xmm2 158 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 159 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] 160 ; CHECK-NEXT: retq 161 %vec = load <16 x i8>, <16 x i8>* %vp 162 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 163 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 164 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 165 ret <16 x i8> %res 166 } 167 168 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %mask) { 169 ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask1: 170 ; CHECK: # %bb.0: 171 ; CHECK-NEXT: vmovdqa (%rdi), %xmm1 172 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 173 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] 174 ; CHECK-NEXT: retq 175 %vec = load <16 x i8>, <16 x i8>* %vp 176 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 177 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 178 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 179 ret <16 x i8> %res 180 } 181 182 define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 183 ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask2: 184 ; CHECK: # %bb.0: 185 ; CHECK-NEXT: vmovdqa (%rdi), %xmm2 186 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 187 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] 188 ; CHECK-NEXT: retq 189 %vec = load <16 x i8>, <16 x i8>* %vp 190 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 191 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 192 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 193 ret <16 x i8> %res 194 } 195 196 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %mask) { 197 ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask2: 198 ; CHECK: # %bb.0: 199 ; CHECK-NEXT: vmovdqa (%rdi), %xmm1 200 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 201 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] 202 ; CHECK-NEXT: retq 203 %vec = load <16 x i8>, <16 x i8>* %vp 204 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 205 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 206 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 207 ret <16 x i8> %res 208 } 209 210 define <16 x i8> @test_16xi8_perm_mem_mask3(<16 x i8>* %vp) { 211 ; CHECK-LABEL: test_16xi8_perm_mem_mask3: 212 ; CHECK: # %bb.0: 213 ; CHECK-NEXT: vmovdqa (%rdi), %xmm0 214 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] 215 ; CHECK-NEXT: retq 216 %vec = load <16 x i8>, <16 x i8>* %vp 217 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 218 ret <16 x i8> %res 219 } 220 define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 221 ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask3: 222 ; CHECK: # %bb.0: 223 ; CHECK-NEXT: vmovdqa (%rdi), %xmm2 224 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1 225 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] 226 ; CHECK-NEXT: retq 227 %vec = load <16 x i8>, <16 x i8>* %vp 228 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 229 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 230 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 231 ret <16 x i8> %res 232 } 233 234 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %mask) { 235 ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask3: 236 ; CHECK: # %bb.0: 237 ; CHECK-NEXT: vmovdqa (%rdi), %xmm1 238 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1 239 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] 240 ; CHECK-NEXT: retq 241 %vec = load <16 x i8>, <16 x i8>* %vp 242 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 243 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 244 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 245 ret <16 x i8> %res 246 } 247 248 define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { 249 ; CHECK-LABEL: test_32xi8_perm_mask0: 250 ; CHECK: # %bb.0: 251 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] 252 ; CHECK-NEXT: retq 253 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 254 ret <32 x i8> %res 255 } 256 define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 257 ; CHECK-LABEL: test_masked_32xi8_perm_mask0: 258 ; CHECK: # %bb.0: 259 ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 260 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] 261 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 262 ; CHECK-NEXT: retq 263 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 264 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 265 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 266 ret <32 x i8> %res 267 } 268 269 define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) { 270 ; CHECK-LABEL: test_masked_z_32xi8_perm_mask0: 271 ; CHECK: # %bb.0: 272 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 273 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] 274 ; CHECK-NEXT: retq 275 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 276 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 277 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 278 ret <32 x i8> %res 279 } 280 define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 281 ; CHECK-LABEL: test_masked_32xi8_perm_mask1: 282 ; CHECK: # %bb.0: 283 ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 284 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] 285 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 286 ; CHECK-NEXT: retq 287 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24> 288 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 289 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 290 ret <32 x i8> %res 291 } 292 293 define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) { 294 ; CHECK-LABEL: test_masked_z_32xi8_perm_mask1: 295 ; CHECK: # %bb.0: 296 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 297 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] 298 ; CHECK-NEXT: retq 299 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24> 300 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 301 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 302 ret <32 x i8> %res 303 } 304 define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 305 ; CHECK-LABEL: test_masked_32xi8_perm_mask2: 306 ; CHECK: # %bb.0: 307 ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 308 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] 309 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 310 ; CHECK-NEXT: retq 311 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29> 312 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 313 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 314 ret <32 x i8> %res 315 } 316 317 define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) { 318 ; CHECK-LABEL: test_masked_z_32xi8_perm_mask2: 319 ; CHECK: # %bb.0: 320 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 321 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] 322 ; CHECK-NEXT: retq 323 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29> 324 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 325 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 326 ret <32 x i8> %res 327 } 328 define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { 329 ; CHECK-LABEL: test_32xi8_perm_mask3: 330 ; CHECK: # %bb.0: 331 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] 332 ; CHECK-NEXT: retq 333 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 334 ret <32 x i8> %res 335 } 336 define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 337 ; CHECK-LABEL: test_masked_32xi8_perm_mask3: 338 ; CHECK: # %bb.0: 339 ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1 340 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] 341 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 342 ; CHECK-NEXT: retq 343 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 344 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 345 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 346 ret <32 x i8> %res 347 } 348 349 define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) { 350 ; CHECK-LABEL: test_masked_z_32xi8_perm_mask3: 351 ; CHECK: # %bb.0: 352 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 353 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] 354 ; CHECK-NEXT: retq 355 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 356 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 357 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 358 ret <32 x i8> %res 359 } 360 define <32 x i8> @test_32xi8_perm_mem_mask0(<32 x i8>* %vp) { 361 ; CHECK-LABEL: test_32xi8_perm_mem_mask0: 362 ; CHECK: # %bb.0: 363 ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 364 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] 365 ; CHECK-NEXT: retq 366 %vec = load <32 x i8>, <32 x i8>* %vp 367 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 368 ret <32 x i8> %res 369 } 370 define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 371 ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask0: 372 ; CHECK: # %bb.0: 373 ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 374 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 375 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] 376 ; CHECK-NEXT: retq 377 %vec = load <32 x i8>, <32 x i8>* %vp 378 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 379 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 380 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 381 ret <32 x i8> %res 382 } 383 384 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %mask) { 385 ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask0: 386 ; CHECK: # %bb.0: 387 ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 388 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 389 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] 390 ; CHECK-NEXT: retq 391 %vec = load <32 x i8>, <32 x i8>* %vp 392 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 393 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 394 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 395 ret <32 x i8> %res 396 } 397 398 define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 399 ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask1: 400 ; CHECK: # %bb.0: 401 ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 402 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 403 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] 404 ; CHECK-NEXT: retq 405 %vec = load <32 x i8>, <32 x i8>* %vp 406 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19> 407 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 408 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 409 ret <32 x i8> %res 410 } 411 412 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %mask) { 413 ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask1: 414 ; CHECK: # %bb.0: 415 ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 416 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 417 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] 418 ; CHECK-NEXT: retq 419 %vec = load <32 x i8>, <32 x i8>* %vp 420 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19> 421 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 422 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 423 ret <32 x i8> %res 424 } 425 426 define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 427 ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask2: 428 ; CHECK: # %bb.0: 429 ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 430 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 431 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] 432 ; CHECK-NEXT: retq 433 %vec = load <32 x i8>, <32 x i8>* %vp 434 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28> 435 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 436 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 437 ret <32 x i8> %res 438 } 439 440 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %mask) { 441 ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask2: 442 ; CHECK: # %bb.0: 443 ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 444 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 445 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] 446 ; CHECK-NEXT: retq 447 %vec = load <32 x i8>, <32 x i8>* %vp 448 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28> 449 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 450 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 451 ret <32 x i8> %res 452 } 453 454 define <32 x i8> @test_32xi8_perm_mem_mask3(<32 x i8>* %vp) { 455 ; CHECK-LABEL: test_32xi8_perm_mem_mask3: 456 ; CHECK: # %bb.0: 457 ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 458 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] 459 ; CHECK-NEXT: retq 460 %vec = load <32 x i8>, <32 x i8>* %vp 461 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 462 ret <32 x i8> %res 463 } 464 define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 465 ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask3: 466 ; CHECK: # %bb.0: 467 ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 468 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1 469 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] 470 ; CHECK-NEXT: retq 471 %vec = load <32 x i8>, <32 x i8>* %vp 472 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 473 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 474 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 475 ret <32 x i8> %res 476 } 477 478 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %mask) { 479 ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask3: 480 ; CHECK: # %bb.0: 481 ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 482 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 483 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] 484 ; CHECK-NEXT: retq 485 %vec = load <32 x i8>, <32 x i8>* %vp 486 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 487 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 488 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 489 ret <32 x i8> %res 490 } 491 492 define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { 493 ; CHECK-LABEL: test_64xi8_perm_mask0: 494 ; CHECK: # %bb.0: 495 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] 496 ; CHECK-NEXT: retq 497 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 498 ret <64 x i8> %res 499 } 500 define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 501 ; CHECK-LABEL: test_masked_64xi8_perm_mask0: 502 ; CHECK: # %bb.0: 503 ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 504 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] 505 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 506 ; CHECK-NEXT: retq 507 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 508 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 509 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 510 ret <64 x i8> %res 511 } 512 513 define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) { 514 ; CHECK-LABEL: test_masked_z_64xi8_perm_mask0: 515 ; CHECK: # %bb.0: 516 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 517 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] 518 ; CHECK-NEXT: retq 519 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 520 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 521 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 522 ret <64 x i8> %res 523 } 524 define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 525 ; CHECK-LABEL: test_masked_64xi8_perm_mask1: 526 ; CHECK: # %bb.0: 527 ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 528 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] 529 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 530 ; CHECK-NEXT: retq 531 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49> 532 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 533 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 534 ret <64 x i8> %res 535 } 536 537 define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) { 538 ; CHECK-LABEL: test_masked_z_64xi8_perm_mask1: 539 ; CHECK: # %bb.0: 540 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 541 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] 542 ; CHECK-NEXT: retq 543 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49> 544 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 545 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 546 ret <64 x i8> %res 547 } 548 define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 549 ; CHECK-LABEL: test_masked_64xi8_perm_mask2: 550 ; CHECK: # %bb.0: 551 ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 552 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] 553 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 554 ; CHECK-NEXT: retq 555 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60> 556 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 557 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 558 ret <64 x i8> %res 559 } 560 561 define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) { 562 ; CHECK-LABEL: test_masked_z_64xi8_perm_mask2: 563 ; CHECK: # %bb.0: 564 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 565 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] 566 ; CHECK-NEXT: retq 567 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60> 568 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 569 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 570 ret <64 x i8> %res 571 } 572 define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { 573 ; CHECK-LABEL: test_64xi8_perm_mask3: 574 ; CHECK: # %bb.0: 575 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] 576 ; CHECK-NEXT: retq 577 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 578 ret <64 x i8> %res 579 } 580 define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 581 ; CHECK-LABEL: test_masked_64xi8_perm_mask3: 582 ; CHECK: # %bb.0: 583 ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1 584 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] 585 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 586 ; CHECK-NEXT: retq 587 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 588 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 589 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 590 ret <64 x i8> %res 591 } 592 593 define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) { 594 ; CHECK-LABEL: test_masked_z_64xi8_perm_mask3: 595 ; CHECK: # %bb.0: 596 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 597 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] 598 ; CHECK-NEXT: retq 599 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 600 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 601 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 602 ret <64 x i8> %res 603 } 604 define <64 x i8> @test_64xi8_perm_mem_mask0(<64 x i8>* %vp) { 605 ; CHECK-LABEL: test_64xi8_perm_mem_mask0: 606 ; CHECK: # %bb.0: 607 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 608 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] 609 ; CHECK-NEXT: retq 610 %vec = load <64 x i8>, <64 x i8>* %vp 611 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 612 ret <64 x i8> %res 613 } 614 define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 615 ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask0: 616 ; CHECK: # %bb.0: 617 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 618 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 619 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] 620 ; CHECK-NEXT: retq 621 %vec = load <64 x i8>, <64 x i8>* %vp 622 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 623 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 624 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 625 ret <64 x i8> %res 626 } 627 628 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %mask) { 629 ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask0: 630 ; CHECK: # %bb.0: 631 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 632 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 633 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] 634 ; CHECK-NEXT: retq 635 %vec = load <64 x i8>, <64 x i8>* %vp 636 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 637 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 638 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 639 ret <64 x i8> %res 640 } 641 642 define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 643 ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask1: 644 ; CHECK: # %bb.0: 645 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 646 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 647 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] 648 ; CHECK-NEXT: retq 649 %vec = load <64 x i8>, <64 x i8>* %vp 650 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49> 651 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 652 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 653 ret <64 x i8> %res 654 } 655 656 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %mask) { 657 ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask1: 658 ; CHECK: # %bb.0: 659 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 660 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 661 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] 662 ; CHECK-NEXT: retq 663 %vec = load <64 x i8>, <64 x i8>* %vp 664 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49> 665 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 666 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 667 ret <64 x i8> %res 668 } 669 670 define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 671 ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask2: 672 ; CHECK: # %bb.0: 673 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 674 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 675 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] 676 ; CHECK-NEXT: retq 677 %vec = load <64 x i8>, <64 x i8>* %vp 678 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61> 679 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 680 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 681 ret <64 x i8> %res 682 } 683 684 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %mask) { 685 ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask2: 686 ; CHECK: # %bb.0: 687 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 688 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 689 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] 690 ; CHECK-NEXT: retq 691 %vec = load <64 x i8>, <64 x i8>* %vp 692 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61> 693 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 694 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 695 ret <64 x i8> %res 696 } 697 698 define <64 x i8> @test_64xi8_perm_mem_mask3(<64 x i8>* %vp) { 699 ; CHECK-LABEL: test_64xi8_perm_mem_mask3: 700 ; CHECK: # %bb.0: 701 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 702 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] 703 ; CHECK-NEXT: retq 704 %vec = load <64 x i8>, <64 x i8>* %vp 705 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 706 ret <64 x i8> %res 707 } 708 define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 709 ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask3: 710 ; CHECK: # %bb.0: 711 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 712 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1 713 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] 714 ; CHECK-NEXT: retq 715 %vec = load <64 x i8>, <64 x i8>* %vp 716 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 717 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 718 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 719 ret <64 x i8> %res 720 } 721 722 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %mask) { 723 ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask3: 724 ; CHECK: # %bb.0: 725 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 726 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1 727 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] 728 ; CHECK-NEXT: retq 729 %vec = load <64 x i8>, <64 x i8>* %vp 730 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 731 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 732 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 733 ret <64 x i8> %res 734 } 735 736 define <8 x i16> @test_8xi16_perm_high_mask0(<8 x i16> %vec) { 737 ; CHECK-LABEL: test_8xi16_perm_high_mask0: 738 ; CHECK: # %bb.0: 739 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] 740 ; CHECK-NEXT: retq 741 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 742 ret <8 x i16> %res 743 } 744 define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 745 ; CHECK-LABEL: test_masked_8xi16_perm_high_mask0: 746 ; CHECK: # %bb.0: 747 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 748 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] 749 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 750 ; CHECK-NEXT: retq 751 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 752 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 753 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 754 ret <8 x i16> %res 755 } 756 757 define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) { 758 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask0: 759 ; CHECK: # %bb.0: 760 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 761 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] 762 ; CHECK-NEXT: retq 763 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 764 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 765 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 766 ret <8 x i16> %res 767 } 768 define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 769 ; CHECK-LABEL: test_masked_8xi16_perm_low_mask1: 770 ; CHECK: # %bb.0: 771 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 772 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] 773 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 774 ; CHECK-NEXT: retq 775 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 776 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 777 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 778 ret <8 x i16> %res 779 } 780 781 define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) { 782 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask1: 783 ; CHECK: # %bb.0: 784 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 785 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] 786 ; CHECK-NEXT: retq 787 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 788 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 789 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 790 ret <8 x i16> %res 791 } 792 define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 793 ; CHECK-LABEL: test_masked_8xi16_perm_high_mask2: 794 ; CHECK: # %bb.0: 795 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 796 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] 797 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 798 ; CHECK-NEXT: retq 799 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5> 800 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 801 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 802 ret <8 x i16> %res 803 } 804 805 define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) { 806 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask2: 807 ; CHECK: # %bb.0: 808 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 809 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] 810 ; CHECK-NEXT: retq 811 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5> 812 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 813 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 814 ret <8 x i16> %res 815 } 816 define <8 x i16> @test_8xi16_perm_low_mask3(<8 x i16> %vec) { 817 ; CHECK-LABEL: test_8xi16_perm_low_mask3: 818 ; CHECK: # %bb.0: 819 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] 820 ; CHECK-NEXT: retq 821 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 822 ret <8 x i16> %res 823 } 824 define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 825 ; CHECK-LABEL: test_masked_8xi16_perm_low_mask3: 826 ; CHECK: # %bb.0: 827 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 828 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] 829 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 830 ; CHECK-NEXT: retq 831 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 832 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 833 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 834 ret <8 x i16> %res 835 } 836 837 define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) { 838 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask3: 839 ; CHECK: # %bb.0: 840 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 841 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] 842 ; CHECK-NEXT: retq 843 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 844 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 845 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 846 ret <8 x i16> %res 847 } 848 define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 849 ; CHECK-LABEL: test_masked_8xi16_perm_high_mask4: 850 ; CHECK: # %bb.0: 851 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 852 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] 853 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 854 ; CHECK-NEXT: retq 855 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6> 856 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 857 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 858 ret <8 x i16> %res 859 } 860 861 define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) { 862 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask4: 863 ; CHECK: # %bb.0: 864 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 865 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] 866 ; CHECK-NEXT: retq 867 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6> 868 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 869 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 870 ret <8 x i16> %res 871 } 872 define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 873 ; CHECK-LABEL: test_masked_8xi16_perm_low_mask5: 874 ; CHECK: # %bb.0: 875 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 876 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] 877 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 878 ; CHECK-NEXT: retq 879 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7> 880 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 881 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 882 ret <8 x i16> %res 883 } 884 885 define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) { 886 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask5: 887 ; CHECK: # %bb.0: 888 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 889 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] 890 ; CHECK-NEXT: retq 891 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7> 892 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 893 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 894 ret <8 x i16> %res 895 } 896 define <8 x i16> @test_8xi16_perm_high_mask6(<8 x i16> %vec) { 897 ; CHECK-LABEL: test_8xi16_perm_high_mask6: 898 ; CHECK: # %bb.0: 899 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] 900 ; CHECK-NEXT: retq 901 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 902 ret <8 x i16> %res 903 } 904 define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 905 ; CHECK-LABEL: test_masked_8xi16_perm_high_mask6: 906 ; CHECK: # %bb.0: 907 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 908 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] 909 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 910 ; CHECK-NEXT: retq 911 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 912 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 913 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 914 ret <8 x i16> %res 915 } 916 917 define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) { 918 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask6: 919 ; CHECK: # %bb.0: 920 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 921 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] 922 ; CHECK-NEXT: retq 923 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 924 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 925 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 926 ret <8 x i16> %res 927 } 928 define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 929 ; CHECK-LABEL: test_masked_8xi16_perm_low_mask7: 930 ; CHECK: # %bb.0: 931 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1 932 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] 933 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 934 ; CHECK-NEXT: retq 935 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 936 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 937 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 938 ret <8 x i16> %res 939 } 940 941 define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) { 942 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask7: 943 ; CHECK: # %bb.0: 944 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 945 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] 946 ; CHECK-NEXT: retq 947 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 948 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 949 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 950 ret <8 x i16> %res 951 } 952 define <8 x i16> @test_8xi16_perm_high_mem_mask0(<8 x i16>* %vp) { 953 ; CHECK-LABEL: test_8xi16_perm_high_mem_mask0: 954 ; CHECK: # %bb.0: 955 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] 956 ; CHECK-NEXT: retq 957 %vec = load <8 x i16>, <8 x i16>* %vp 958 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 959 ret <8 x i16> %res 960 } 961 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 962 ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask0: 963 ; CHECK: # %bb.0: 964 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 965 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] 966 ; CHECK-NEXT: retq 967 %vec = load <8 x i16>, <8 x i16>* %vp 968 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 969 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 970 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 971 ret <8 x i16> %res 972 } 973 974 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %mask) { 975 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: 976 ; CHECK: # %bb.0: 977 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 978 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] 979 ; CHECK-NEXT: retq 980 %vec = load <8 x i16>, <8 x i16>* %vp 981 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 982 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 983 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 984 ret <8 x i16> %res 985 } 986 987 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 988 ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask1: 989 ; CHECK: # %bb.0: 990 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 991 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] 992 ; CHECK-NEXT: retq 993 %vec = load <8 x i16>, <8 x i16>* %vp 994 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 995 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 996 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 997 ret <8 x i16> %res 998 } 999 1000 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %mask) { 1001 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: 1002 ; CHECK: # %bb.0: 1003 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1004 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] 1005 ; CHECK-NEXT: retq 1006 %vec = load <8 x i16>, <8 x i16>* %vp 1007 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1008 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1009 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1010 ret <8 x i16> %res 1011 } 1012 1013 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1014 ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask2: 1015 ; CHECK: # %bb.0: 1016 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1017 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] 1018 ; CHECK-NEXT: retq 1019 %vec = load <8 x i16>, <8 x i16>* %vp 1020 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7> 1021 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1022 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1023 ret <8 x i16> %res 1024 } 1025 1026 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %mask) { 1027 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: 1028 ; CHECK: # %bb.0: 1029 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1030 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] 1031 ; CHECK-NEXT: retq 1032 %vec = load <8 x i16>, <8 x i16>* %vp 1033 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7> 1034 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1035 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1036 ret <8 x i16> %res 1037 } 1038 1039 define <8 x i16> @test_8xi16_perm_low_mem_mask3(<8 x i16>* %vp) { 1040 ; CHECK-LABEL: test_8xi16_perm_low_mem_mask3: 1041 ; CHECK: # %bb.0: 1042 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] 1043 ; CHECK-NEXT: retq 1044 %vec = load <8 x i16>, <8 x i16>* %vp 1045 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 1046 ret <8 x i16> %res 1047 } 1048 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1049 ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask3: 1050 ; CHECK: # %bb.0: 1051 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1052 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] 1053 ; CHECK-NEXT: retq 1054 %vec = load <8 x i16>, <8 x i16>* %vp 1055 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 1056 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1057 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1058 ret <8 x i16> %res 1059 } 1060 1061 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %mask) { 1062 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: 1063 ; CHECK: # %bb.0: 1064 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1065 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] 1066 ; CHECK-NEXT: retq 1067 %vec = load <8 x i16>, <8 x i16>* %vp 1068 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 1069 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1070 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1071 ret <8 x i16> %res 1072 } 1073 1074 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1075 ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask4: 1076 ; CHECK: # %bb.0: 1077 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1078 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] 1079 ; CHECK-NEXT: retq 1080 %vec = load <8 x i16>, <8 x i16>* %vp 1081 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5> 1082 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1083 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1084 ret <8 x i16> %res 1085 } 1086 1087 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %mask) { 1088 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: 1089 ; CHECK: # %bb.0: 1090 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1091 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] 1092 ; CHECK-NEXT: retq 1093 %vec = load <8 x i16>, <8 x i16>* %vp 1094 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5> 1095 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1096 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1097 ret <8 x i16> %res 1098 } 1099 1100 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1101 ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask5: 1102 ; CHECK: # %bb.0: 1103 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1104 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] 1105 ; CHECK-NEXT: retq 1106 %vec = load <8 x i16>, <8 x i16>* %vp 1107 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1108 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1109 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1110 ret <8 x i16> %res 1111 } 1112 1113 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %mask) { 1114 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: 1115 ; CHECK: # %bb.0: 1116 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1117 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] 1118 ; CHECK-NEXT: retq 1119 %vec = load <8 x i16>, <8 x i16>* %vp 1120 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1121 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1122 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1123 ret <8 x i16> %res 1124 } 1125 1126 define <8 x i16> @test_8xi16_perm_high_mem_mask6(<8 x i16>* %vp) { 1127 ; CHECK-LABEL: test_8xi16_perm_high_mem_mask6: 1128 ; CHECK: # %bb.0: 1129 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] 1130 ; CHECK-NEXT: retq 1131 %vec = load <8 x i16>, <8 x i16>* %vp 1132 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 1133 ret <8 x i16> %res 1134 } 1135 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1136 ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask6: 1137 ; CHECK: # %bb.0: 1138 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1139 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] 1140 ; CHECK-NEXT: retq 1141 %vec = load <8 x i16>, <8 x i16>* %vp 1142 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 1143 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1144 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1145 ret <8 x i16> %res 1146 } 1147 1148 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %mask) { 1149 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: 1150 ; CHECK: # %bb.0: 1151 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1152 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] 1153 ; CHECK-NEXT: retq 1154 %vec = load <8 x i16>, <8 x i16>* %vp 1155 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 1156 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1157 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1158 ret <8 x i16> %res 1159 } 1160 1161 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 1162 ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask7: 1163 ; CHECK: # %bb.0: 1164 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 1165 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] 1166 ; CHECK-NEXT: retq 1167 %vec = load <8 x i16>, <8 x i16>* %vp 1168 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7> 1169 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1170 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 1171 ret <8 x i16> %res 1172 } 1173 1174 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %mask) { 1175 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: 1176 ; CHECK: # %bb.0: 1177 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 1178 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] 1179 ; CHECK-NEXT: retq 1180 %vec = load <8 x i16>, <8 x i16>* %vp 1181 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7> 1182 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 1183 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 1184 ret <8 x i16> %res 1185 } 1186 1187 define <16 x i16> @test_16xi16_perm_high_mask0(<16 x i16> %vec) { 1188 ; CHECK-LABEL: test_16xi16_perm_high_mask0: 1189 ; CHECK: # %bb.0: 1190 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] 1191 ; CHECK-NEXT: retq 1192 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 1193 ret <16 x i16> %res 1194 } 1195 define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1196 ; CHECK-LABEL: test_masked_16xi16_perm_high_mask0: 1197 ; CHECK: # %bb.0: 1198 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1199 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] 1200 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1201 ; CHECK-NEXT: retq 1202 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 1203 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1204 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1205 ret <16 x i16> %res 1206 } 1207 1208 define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) { 1209 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask0: 1210 ; CHECK: # %bb.0: 1211 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1212 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] 1213 ; CHECK-NEXT: retq 1214 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 1215 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1216 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1217 ret <16 x i16> %res 1218 } 1219 define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1220 ; CHECK-LABEL: test_masked_16xi16_perm_low_mask1: 1221 ; CHECK: # %bb.0: 1222 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1223 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] 1224 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1225 ; CHECK-NEXT: retq 1226 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1227 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1228 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1229 ret <16 x i16> %res 1230 } 1231 1232 define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) { 1233 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask1: 1234 ; CHECK: # %bb.0: 1235 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1236 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] 1237 ; CHECK-NEXT: retq 1238 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1239 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1240 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1241 ret <16 x i16> %res 1242 } 1243 define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1244 ; CHECK-LABEL: test_masked_16xi16_perm_high_mask2: 1245 ; CHECK: # %bb.0: 1246 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1247 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] 1248 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1249 ; CHECK-NEXT: retq 1250 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13> 1251 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1252 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1253 ret <16 x i16> %res 1254 } 1255 1256 define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) { 1257 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask2: 1258 ; CHECK: # %bb.0: 1259 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1260 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] 1261 ; CHECK-NEXT: retq 1262 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13> 1263 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1264 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1265 ret <16 x i16> %res 1266 } 1267 define <16 x i16> @test_16xi16_perm_low_mask3(<16 x i16> %vec) { 1268 ; CHECK-LABEL: test_16xi16_perm_low_mask3: 1269 ; CHECK: # %bb.0: 1270 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] 1271 ; CHECK-NEXT: retq 1272 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1273 ret <16 x i16> %res 1274 } 1275 define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1276 ; CHECK-LABEL: test_masked_16xi16_perm_low_mask3: 1277 ; CHECK: # %bb.0: 1278 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1279 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] 1280 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1281 ; CHECK-NEXT: retq 1282 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1283 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1284 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1285 ret <16 x i16> %res 1286 } 1287 1288 define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) { 1289 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask3: 1290 ; CHECK: # %bb.0: 1291 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1292 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] 1293 ; CHECK-NEXT: retq 1294 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1295 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1296 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1297 ret <16 x i16> %res 1298 } 1299 define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1300 ; CHECK-LABEL: test_masked_16xi16_perm_high_mask4: 1301 ; CHECK: # %bb.0: 1302 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1303 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] 1304 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1305 ; CHECK-NEXT: retq 1306 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15> 1307 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1308 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1309 ret <16 x i16> %res 1310 } 1311 1312 define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) { 1313 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask4: 1314 ; CHECK: # %bb.0: 1315 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1316 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] 1317 ; CHECK-NEXT: retq 1318 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15> 1319 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1320 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1321 ret <16 x i16> %res 1322 } 1323 define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1324 ; CHECK-LABEL: test_masked_16xi16_perm_low_mask5: 1325 ; CHECK: # %bb.0: 1326 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1327 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] 1328 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1329 ; CHECK-NEXT: retq 1330 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1331 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1332 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1333 ret <16 x i16> %res 1334 } 1335 1336 define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) { 1337 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask5: 1338 ; CHECK: # %bb.0: 1339 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1340 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] 1341 ; CHECK-NEXT: retq 1342 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1343 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1344 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1345 ret <16 x i16> %res 1346 } 1347 define <16 x i16> @test_16xi16_perm_high_mask6(<16 x i16> %vec) { 1348 ; CHECK-LABEL: test_16xi16_perm_high_mask6: 1349 ; CHECK: # %bb.0: 1350 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] 1351 ; CHECK-NEXT: retq 1352 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 1353 ret <16 x i16> %res 1354 } 1355 define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1356 ; CHECK-LABEL: test_masked_16xi16_perm_high_mask6: 1357 ; CHECK: # %bb.0: 1358 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1359 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] 1360 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1361 ; CHECK-NEXT: retq 1362 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 1363 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1364 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1365 ret <16 x i16> %res 1366 } 1367 1368 define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) { 1369 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask6: 1370 ; CHECK: # %bb.0: 1371 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1372 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] 1373 ; CHECK-NEXT: retq 1374 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 1375 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1376 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1377 ret <16 x i16> %res 1378 } 1379 define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 1380 ; CHECK-LABEL: test_masked_16xi16_perm_low_mask7: 1381 ; CHECK: # %bb.0: 1382 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1 1383 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] 1384 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 1385 ; CHECK-NEXT: retq 1386 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15> 1387 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1388 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1389 ret <16 x i16> %res 1390 } 1391 1392 define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) { 1393 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask7: 1394 ; CHECK: # %bb.0: 1395 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1396 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] 1397 ; CHECK-NEXT: retq 1398 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15> 1399 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1400 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1401 ret <16 x i16> %res 1402 } 1403 define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) { 1404 ; CHECK-LABEL: test_16xi16_perm_high_mem_mask0: 1405 ; CHECK: # %bb.0: 1406 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] 1407 ; CHECK-NEXT: retq 1408 %vec = load <16 x i16>, <16 x i16>* %vp 1409 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 1410 ret <16 x i16> %res 1411 } 1412 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1413 ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask0: 1414 ; CHECK: # %bb.0: 1415 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1416 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] 1417 ; CHECK-NEXT: retq 1418 %vec = load <16 x i16>, <16 x i16>* %vp 1419 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 1420 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1421 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1422 ret <16 x i16> %res 1423 } 1424 1425 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { 1426 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: 1427 ; CHECK: # %bb.0: 1428 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1429 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] 1430 ; CHECK-NEXT: retq 1431 %vec = load <16 x i16>, <16 x i16>* %vp 1432 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 1433 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1434 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1435 ret <16 x i16> %res 1436 } 1437 1438 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1439 ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask1: 1440 ; CHECK: # %bb.0: 1441 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1442 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] 1443 ; CHECK-NEXT: retq 1444 %vec = load <16 x i16>, <16 x i16>* %vp 1445 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1446 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1447 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1448 ret <16 x i16> %res 1449 } 1450 1451 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { 1452 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: 1453 ; CHECK: # %bb.0: 1454 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1455 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] 1456 ; CHECK-NEXT: retq 1457 %vec = load <16 x i16>, <16 x i16>* %vp 1458 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1459 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1460 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1461 ret <16 x i16> %res 1462 } 1463 1464 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1465 ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask2: 1466 ; CHECK: # %bb.0: 1467 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1468 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] 1469 ; CHECK-NEXT: retq 1470 %vec = load <16 x i16>, <16 x i16>* %vp 1471 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14> 1472 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1473 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1474 ret <16 x i16> %res 1475 } 1476 1477 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { 1478 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: 1479 ; CHECK: # %bb.0: 1480 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1481 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] 1482 ; CHECK-NEXT: retq 1483 %vec = load <16 x i16>, <16 x i16>* %vp 1484 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14> 1485 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1486 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1487 ret <16 x i16> %res 1488 } 1489 1490 define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) { 1491 ; CHECK-LABEL: test_16xi16_perm_low_mem_mask3: 1492 ; CHECK: # %bb.0: 1493 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] 1494 ; CHECK-NEXT: retq 1495 %vec = load <16 x i16>, <16 x i16>* %vp 1496 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1497 ret <16 x i16> %res 1498 } 1499 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1500 ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask3: 1501 ; CHECK: # %bb.0: 1502 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1503 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] 1504 ; CHECK-NEXT: retq 1505 %vec = load <16 x i16>, <16 x i16>* %vp 1506 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1507 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1508 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1509 ret <16 x i16> %res 1510 } 1511 1512 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { 1513 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: 1514 ; CHECK: # %bb.0: 1515 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1516 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] 1517 ; CHECK-NEXT: retq 1518 %vec = load <16 x i16>, <16 x i16>* %vp 1519 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 1520 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1521 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1522 ret <16 x i16> %res 1523 } 1524 1525 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1526 ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask4: 1527 ; CHECK: # %bb.0: 1528 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1529 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] 1530 ; CHECK-NEXT: retq 1531 %vec = load <16 x i16>, <16 x i16>* %vp 1532 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15> 1533 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1534 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1535 ret <16 x i16> %res 1536 } 1537 1538 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %mask) { 1539 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: 1540 ; CHECK: # %bb.0: 1541 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1542 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] 1543 ; CHECK-NEXT: retq 1544 %vec = load <16 x i16>, <16 x i16>* %vp 1545 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15> 1546 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1547 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1548 ret <16 x i16> %res 1549 } 1550 1551 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1552 ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask5: 1553 ; CHECK: # %bb.0: 1554 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1555 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] 1556 ; CHECK-NEXT: retq 1557 %vec = load <16 x i16>, <16 x i16>* %vp 1558 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1559 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1560 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1561 ret <16 x i16> %res 1562 } 1563 1564 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %mask) { 1565 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: 1566 ; CHECK: # %bb.0: 1567 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1568 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] 1569 ; CHECK-NEXT: retq 1570 %vec = load <16 x i16>, <16 x i16>* %vp 1571 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1572 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1573 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1574 ret <16 x i16> %res 1575 } 1576 1577 define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) { 1578 ; CHECK-LABEL: test_16xi16_perm_high_mem_mask6: 1579 ; CHECK: # %bb.0: 1580 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] 1581 ; CHECK-NEXT: retq 1582 %vec = load <16 x i16>, <16 x i16>* %vp 1583 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 1584 ret <16 x i16> %res 1585 } 1586 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1587 ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask6: 1588 ; CHECK: # %bb.0: 1589 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1590 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] 1591 ; CHECK-NEXT: retq 1592 %vec = load <16 x i16>, <16 x i16>* %vp 1593 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 1594 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1595 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1596 ret <16 x i16> %res 1597 } 1598 1599 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %mask) { 1600 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: 1601 ; CHECK: # %bb.0: 1602 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1603 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] 1604 ; CHECK-NEXT: retq 1605 %vec = load <16 x i16>, <16 x i16>* %vp 1606 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 1607 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1608 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1609 ret <16 x i16> %res 1610 } 1611 1612 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 1613 ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask7: 1614 ; CHECK: # %bb.0: 1615 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1 1616 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] 1617 ; CHECK-NEXT: retq 1618 %vec = load <16 x i16>, <16 x i16>* %vp 1619 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1620 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1621 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 1622 ret <16 x i16> %res 1623 } 1624 1625 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %mask) { 1626 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: 1627 ; CHECK: # %bb.0: 1628 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1 1629 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] 1630 ; CHECK-NEXT: retq 1631 %vec = load <16 x i16>, <16 x i16>* %vp 1632 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 1633 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 1634 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 1635 ret <16 x i16> %res 1636 } 1637 1638 define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) { 1639 ; CHECK-LABEL: test_32xi16_perm_high_mask0: 1640 ; CHECK: # %bb.0: 1641 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] 1642 ; CHECK-NEXT: retq 1643 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 1644 ret <32 x i16> %res 1645 } 1646 define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1647 ; CHECK-LABEL: test_masked_32xi16_perm_high_mask0: 1648 ; CHECK: # %bb.0: 1649 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1650 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] 1651 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1652 ; CHECK-NEXT: retq 1653 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 1654 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1655 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1656 ret <32 x i16> %res 1657 } 1658 1659 define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) { 1660 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask0: 1661 ; CHECK: # %bb.0: 1662 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1663 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] 1664 ; CHECK-NEXT: retq 1665 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 1666 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1667 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1668 ret <32 x i16> %res 1669 } 1670 define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1671 ; CHECK-LABEL: test_masked_32xi16_perm_low_mask1: 1672 ; CHECK: # %bb.0: 1673 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1674 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] 1675 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1676 ; CHECK-NEXT: retq 1677 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31> 1678 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1679 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1680 ret <32 x i16> %res 1681 } 1682 1683 define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) { 1684 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask1: 1685 ; CHECK: # %bb.0: 1686 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1687 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] 1688 ; CHECK-NEXT: retq 1689 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31> 1690 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1691 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1692 ret <32 x i16> %res 1693 } 1694 define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1695 ; CHECK-LABEL: test_masked_32xi16_perm_high_mask2: 1696 ; CHECK: # %bb.0: 1697 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1698 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] 1699 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1700 ; CHECK-NEXT: retq 1701 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31> 1702 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1703 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1704 ret <32 x i16> %res 1705 } 1706 1707 define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) { 1708 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask2: 1709 ; CHECK: # %bb.0: 1710 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1711 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] 1712 ; CHECK-NEXT: retq 1713 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31> 1714 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1715 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1716 ret <32 x i16> %res 1717 } 1718 define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) { 1719 ; CHECK-LABEL: test_32xi16_perm_low_mask3: 1720 ; CHECK: # %bb.0: 1721 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] 1722 ; CHECK-NEXT: retq 1723 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 1724 ret <32 x i16> %res 1725 } 1726 define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1727 ; CHECK-LABEL: test_masked_32xi16_perm_low_mask3: 1728 ; CHECK: # %bb.0: 1729 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1730 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] 1731 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1732 ; CHECK-NEXT: retq 1733 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 1734 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1735 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1736 ret <32 x i16> %res 1737 } 1738 1739 define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) { 1740 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask3: 1741 ; CHECK: # %bb.0: 1742 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1743 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] 1744 ; CHECK-NEXT: retq 1745 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 1746 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1747 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1748 ret <32 x i16> %res 1749 } 1750 define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1751 ; CHECK-LABEL: test_masked_32xi16_perm_high_mask4: 1752 ; CHECK: # %bb.0: 1753 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1754 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] 1755 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1756 ; CHECK-NEXT: retq 1757 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30> 1758 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1759 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1760 ret <32 x i16> %res 1761 } 1762 1763 define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) { 1764 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask4: 1765 ; CHECK: # %bb.0: 1766 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1767 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] 1768 ; CHECK-NEXT: retq 1769 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30> 1770 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1771 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1772 ret <32 x i16> %res 1773 } 1774 define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1775 ; CHECK-LABEL: test_masked_32xi16_perm_low_mask5: 1776 ; CHECK: # %bb.0: 1777 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1778 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] 1779 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1780 ; CHECK-NEXT: retq 1781 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31> 1782 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1783 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1784 ret <32 x i16> %res 1785 } 1786 1787 define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) { 1788 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask5: 1789 ; CHECK: # %bb.0: 1790 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1791 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] 1792 ; CHECK-NEXT: retq 1793 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31> 1794 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1795 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1796 ret <32 x i16> %res 1797 } 1798 define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) { 1799 ; CHECK-LABEL: test_32xi16_perm_high_mask6: 1800 ; CHECK: # %bb.0: 1801 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] 1802 ; CHECK-NEXT: retq 1803 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 1804 ret <32 x i16> %res 1805 } 1806 define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1807 ; CHECK-LABEL: test_masked_32xi16_perm_high_mask6: 1808 ; CHECK: # %bb.0: 1809 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1810 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] 1811 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1812 ; CHECK-NEXT: retq 1813 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 1814 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1815 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1816 ret <32 x i16> %res 1817 } 1818 1819 define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) { 1820 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask6: 1821 ; CHECK: # %bb.0: 1822 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1823 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] 1824 ; CHECK-NEXT: retq 1825 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 1826 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1827 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1828 ret <32 x i16> %res 1829 } 1830 define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 1831 ; CHECK-LABEL: test_masked_32xi16_perm_low_mask7: 1832 ; CHECK: # %bb.0: 1833 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1 1834 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] 1835 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1836 ; CHECK-NEXT: retq 1837 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 1838 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1839 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1840 ret <32 x i16> %res 1841 } 1842 1843 define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) { 1844 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask7: 1845 ; CHECK: # %bb.0: 1846 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1847 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] 1848 ; CHECK-NEXT: retq 1849 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 1850 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1851 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1852 ret <32 x i16> %res 1853 } 1854 define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { 1855 ; CHECK-LABEL: test_32xi16_perm_high_mem_mask0: 1856 ; CHECK: # %bb.0: 1857 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] 1858 ; CHECK-NEXT: retq 1859 %vec = load <32 x i16>, <32 x i16>* %vp 1860 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 1861 ret <32 x i16> %res 1862 } 1863 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 1864 ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask0: 1865 ; CHECK: # %bb.0: 1866 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1867 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] 1868 ; CHECK-NEXT: retq 1869 %vec = load <32 x i16>, <32 x i16>* %vp 1870 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 1871 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1872 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1873 ret <32 x i16> %res 1874 } 1875 1876 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { 1877 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: 1878 ; CHECK: # %bb.0: 1879 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1880 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] 1881 ; CHECK-NEXT: retq 1882 %vec = load <32 x i16>, <32 x i16>* %vp 1883 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 1884 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1885 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1886 ret <32 x i16> %res 1887 } 1888 1889 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 1890 ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask1: 1891 ; CHECK: # %bb.0: 1892 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1893 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] 1894 ; CHECK-NEXT: retq 1895 %vec = load <32 x i16>, <32 x i16>* %vp 1896 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31> 1897 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1898 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1899 ret <32 x i16> %res 1900 } 1901 1902 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { 1903 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: 1904 ; CHECK: # %bb.0: 1905 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1906 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] 1907 ; CHECK-NEXT: retq 1908 %vec = load <32 x i16>, <32 x i16>* %vp 1909 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31> 1910 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1911 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1912 ret <32 x i16> %res 1913 } 1914 1915 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 1916 ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask2: 1917 ; CHECK: # %bb.0: 1918 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1919 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] 1920 ; CHECK-NEXT: retq 1921 %vec = load <32 x i16>, <32 x i16>* %vp 1922 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28> 1923 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1924 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1925 ret <32 x i16> %res 1926 } 1927 1928 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { 1929 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: 1930 ; CHECK: # %bb.0: 1931 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1932 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] 1933 ; CHECK-NEXT: retq 1934 %vec = load <32 x i16>, <32 x i16>* %vp 1935 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28> 1936 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1937 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1938 ret <32 x i16> %res 1939 } 1940 1941 define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { 1942 ; CHECK-LABEL: test_32xi16_perm_low_mem_mask3: 1943 ; CHECK: # %bb.0: 1944 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] 1945 ; CHECK-NEXT: retq 1946 %vec = load <32 x i16>, <32 x i16>* %vp 1947 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 1948 ret <32 x i16> %res 1949 } 1950 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 1951 ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask3: 1952 ; CHECK: # %bb.0: 1953 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1954 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] 1955 ; CHECK-NEXT: retq 1956 %vec = load <32 x i16>, <32 x i16>* %vp 1957 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 1958 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1959 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1960 ret <32 x i16> %res 1961 } 1962 1963 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { 1964 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: 1965 ; CHECK: # %bb.0: 1966 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1967 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] 1968 ; CHECK-NEXT: retq 1969 %vec = load <32 x i16>, <32 x i16>* %vp 1970 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 1971 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1972 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1973 ret <32 x i16> %res 1974 } 1975 1976 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 1977 ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask4: 1978 ; CHECK: # %bb.0: 1979 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 1980 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] 1981 ; CHECK-NEXT: retq 1982 %vec = load <32 x i16>, <32 x i16>* %vp 1983 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29> 1984 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1985 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 1986 ret <32 x i16> %res 1987 } 1988 1989 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) { 1990 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: 1991 ; CHECK: # %bb.0: 1992 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 1993 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] 1994 ; CHECK-NEXT: retq 1995 %vec = load <32 x i16>, <32 x i16>* %vp 1996 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29> 1997 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 1998 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 1999 ret <32 x i16> %res 2000 } 2001 2002 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 2003 ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask5: 2004 ; CHECK: # %bb.0: 2005 ; CHECK-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] 2006 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 2007 ; CHECK-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} 2008 ; CHECK-NEXT: retq 2009 %vec = load <32 x i16>, <32 x i16>* %vp 2010 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31> 2011 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2012 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 2013 ret <32 x i16> %res 2014 } 2015 2016 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %mask) { 2017 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: 2018 ; CHECK: # %bb.0: 2019 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] 2020 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 2021 ; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} 2022 ; CHECK-NEXT: retq 2023 %vec = load <32 x i16>, <32 x i16>* %vp 2024 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31> 2025 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2026 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 2027 ret <32 x i16> %res 2028 } 2029 2030 define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { 2031 ; CHECK-LABEL: test_32xi16_perm_high_mem_mask6: 2032 ; CHECK: # %bb.0: 2033 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] 2034 ; CHECK-NEXT: retq 2035 %vec = load <32 x i16>, <32 x i16>* %vp 2036 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 2037 ret <32 x i16> %res 2038 } 2039 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 2040 ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask6: 2041 ; CHECK: # %bb.0: 2042 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 2043 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] 2044 ; CHECK-NEXT: retq 2045 %vec = load <32 x i16>, <32 x i16>* %vp 2046 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 2047 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2048 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 2049 ret <32 x i16> %res 2050 } 2051 2052 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) { 2053 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: 2054 ; CHECK: # %bb.0: 2055 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 2056 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] 2057 ; CHECK-NEXT: retq 2058 %vec = load <32 x i16>, <32 x i16>* %vp 2059 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 2060 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2061 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 2062 ret <32 x i16> %res 2063 } 2064 2065 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 2066 ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask7: 2067 ; CHECK: # %bb.0: 2068 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1 2069 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] 2070 ; CHECK-NEXT: retq 2071 %vec = load <32 x i16>, <32 x i16>* %vp 2072 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 2073 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2074 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 2075 ret <32 x i16> %res 2076 } 2077 2078 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) { 2079 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: 2080 ; CHECK: # %bb.0: 2081 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1 2082 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] 2083 ; CHECK-NEXT: retq 2084 %vec = load <32 x i16>, <32 x i16>* %vp 2085 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 2086 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 2087 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 2088 ret <32 x i16> %res 2089 } 2090 2091 define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) { 2092 ; CHECK-LABEL: test_4xi32_perm_mask0: 2093 ; CHECK: # %bb.0: 2094 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] 2095 ; CHECK-NEXT: retq 2096 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 2097 ret <4 x i32> %res 2098 } 2099 define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 2100 ; CHECK-LABEL: test_masked_4xi32_perm_mask0: 2101 ; CHECK: # %bb.0: 2102 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2103 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] 2104 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 2105 ; CHECK-NEXT: retq 2106 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 2107 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2108 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2109 ret <4 x i32> %res 2110 } 2111 2112 define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) { 2113 ; CHECK-LABEL: test_masked_z_4xi32_perm_mask0: 2114 ; CHECK: # %bb.0: 2115 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2116 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] 2117 ; CHECK-NEXT: retq 2118 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 2119 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2120 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2121 ret <4 x i32> %res 2122 } 2123 define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 2124 ; CHECK-LABEL: test_masked_4xi32_perm_mask1: 2125 ; CHECK: # %bb.0: 2126 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2127 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] 2128 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 2129 ; CHECK-NEXT: retq 2130 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0> 2131 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2132 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2133 ret <4 x i32> %res 2134 } 2135 2136 define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) { 2137 ; CHECK-LABEL: test_masked_z_4xi32_perm_mask1: 2138 ; CHECK: # %bb.0: 2139 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2140 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] 2141 ; CHECK-NEXT: retq 2142 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0> 2143 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2144 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2145 ret <4 x i32> %res 2146 } 2147 define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 2148 ; CHECK-LABEL: test_masked_4xi32_perm_mask2: 2149 ; CHECK: # %bb.0: 2150 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2151 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] 2152 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 2153 ; CHECK-NEXT: retq 2154 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0> 2155 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2156 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2157 ret <4 x i32> %res 2158 } 2159 2160 define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) { 2161 ; CHECK-LABEL: test_masked_z_4xi32_perm_mask2: 2162 ; CHECK: # %bb.0: 2163 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2164 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] 2165 ; CHECK-NEXT: retq 2166 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0> 2167 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2168 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2169 ret <4 x i32> %res 2170 } 2171 define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) { 2172 ; CHECK-LABEL: test_4xi32_perm_mask3: 2173 ; CHECK: # %bb.0: 2174 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] 2175 ; CHECK-NEXT: retq 2176 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 2177 ret <4 x i32> %res 2178 } 2179 define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 2180 ; CHECK-LABEL: test_masked_4xi32_perm_mask3: 2181 ; CHECK: # %bb.0: 2182 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2183 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] 2184 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 2185 ; CHECK-NEXT: retq 2186 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 2187 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2188 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2189 ret <4 x i32> %res 2190 } 2191 2192 define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) { 2193 ; CHECK-LABEL: test_masked_z_4xi32_perm_mask3: 2194 ; CHECK: # %bb.0: 2195 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2196 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] 2197 ; CHECK-NEXT: retq 2198 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 2199 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2200 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2201 ret <4 x i32> %res 2202 } 2203 define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) { 2204 ; CHECK-LABEL: test_4xi32_perm_mem_mask0: 2205 ; CHECK: # %bb.0: 2206 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] 2207 ; CHECK-NEXT: retq 2208 %vec = load <4 x i32>, <4 x i32>* %vp 2209 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 2210 ret <4 x i32> %res 2211 } 2212 define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 2213 ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask0: 2214 ; CHECK: # %bb.0: 2215 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2216 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] 2217 ; CHECK-NEXT: retq 2218 %vec = load <4 x i32>, <4 x i32>* %vp 2219 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 2220 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2221 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2222 ret <4 x i32> %res 2223 } 2224 2225 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %mask) { 2226 ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask0: 2227 ; CHECK: # %bb.0: 2228 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2229 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] 2230 ; CHECK-NEXT: retq 2231 %vec = load <4 x i32>, <4 x i32>* %vp 2232 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 2233 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2234 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2235 ret <4 x i32> %res 2236 } 2237 2238 define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 2239 ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask1: 2240 ; CHECK: # %bb.0: 2241 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2242 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] 2243 ; CHECK-NEXT: retq 2244 %vec = load <4 x i32>, <4 x i32>* %vp 2245 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1> 2246 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2247 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2248 ret <4 x i32> %res 2249 } 2250 2251 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %mask) { 2252 ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask1: 2253 ; CHECK: # %bb.0: 2254 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2255 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] 2256 ; CHECK-NEXT: retq 2257 %vec = load <4 x i32>, <4 x i32>* %vp 2258 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1> 2259 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2260 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2261 ret <4 x i32> %res 2262 } 2263 2264 define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 2265 ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask2: 2266 ; CHECK: # %bb.0: 2267 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2268 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] 2269 ; CHECK-NEXT: retq 2270 %vec = load <4 x i32>, <4 x i32>* %vp 2271 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1> 2272 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2273 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2274 ret <4 x i32> %res 2275 } 2276 2277 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %mask) { 2278 ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask2: 2279 ; CHECK: # %bb.0: 2280 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2281 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] 2282 ; CHECK-NEXT: retq 2283 %vec = load <4 x i32>, <4 x i32>* %vp 2284 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1> 2285 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2286 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2287 ret <4 x i32> %res 2288 } 2289 2290 define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) { 2291 ; CHECK-LABEL: test_4xi32_perm_mem_mask3: 2292 ; CHECK: # %bb.0: 2293 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] 2294 ; CHECK-NEXT: retq 2295 %vec = load <4 x i32>, <4 x i32>* %vp 2296 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 2297 ret <4 x i32> %res 2298 } 2299 define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 2300 ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask3: 2301 ; CHECK: # %bb.0: 2302 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2303 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] 2304 ; CHECK-NEXT: retq 2305 %vec = load <4 x i32>, <4 x i32>* %vp 2306 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 2307 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2308 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 2309 ret <4 x i32> %res 2310 } 2311 2312 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %mask) { 2313 ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask3: 2314 ; CHECK: # %bb.0: 2315 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2316 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] 2317 ; CHECK-NEXT: retq 2318 %vec = load <4 x i32>, <4 x i32>* %vp 2319 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 2320 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 2321 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 2322 ret <4 x i32> %res 2323 } 2324 2325 define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) { 2326 ; CHECK-LABEL: test_8xi32_perm_mask0: 2327 ; CHECK: # %bb.0: 2328 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] 2329 ; CHECK-NEXT: retq 2330 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 2331 ret <8 x i32> %res 2332 } 2333 define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 2334 ; CHECK-LABEL: test_masked_8xi32_perm_mask0: 2335 ; CHECK: # %bb.0: 2336 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2337 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] 2338 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 2339 ; CHECK-NEXT: retq 2340 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 2341 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2342 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2343 ret <8 x i32> %res 2344 } 2345 2346 define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { 2347 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask0: 2348 ; CHECK: # %bb.0: 2349 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2350 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] 2351 ; CHECK-NEXT: retq 2352 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 2353 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2354 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2355 ret <8 x i32> %res 2356 } 2357 define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 2358 ; CHECK-LABEL: test_masked_8xi32_perm_mask1: 2359 ; CHECK: # %bb.0: 2360 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2361 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] 2362 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 2363 ; CHECK-NEXT: retq 2364 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7> 2365 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2366 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2367 ret <8 x i32> %res 2368 } 2369 2370 define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { 2371 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask1: 2372 ; CHECK: # %bb.0: 2373 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2374 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] 2375 ; CHECK-NEXT: retq 2376 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7> 2377 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2378 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2379 ret <8 x i32> %res 2380 } 2381 define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 2382 ; CHECK-LABEL: test_masked_8xi32_perm_mask2: 2383 ; CHECK: # %bb.0: 2384 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2385 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] 2386 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 2387 ; CHECK-NEXT: retq 2388 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7> 2389 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2390 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2391 ret <8 x i32> %res 2392 } 2393 2394 define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { 2395 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask2: 2396 ; CHECK: # %bb.0: 2397 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2398 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] 2399 ; CHECK-NEXT: retq 2400 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7> 2401 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2402 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2403 ret <8 x i32> %res 2404 } 2405 define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) { 2406 ; CHECK-LABEL: test_8xi32_perm_mask3: 2407 ; CHECK: # %bb.0: 2408 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] 2409 ; CHECK-NEXT: retq 2410 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 2411 ret <8 x i32> %res 2412 } 2413 define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 2414 ; CHECK-LABEL: test_masked_8xi32_perm_mask3: 2415 ; CHECK: # %bb.0: 2416 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2417 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] 2418 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 2419 ; CHECK-NEXT: retq 2420 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 2421 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2422 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2423 ret <8 x i32> %res 2424 } 2425 2426 define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { 2427 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask3: 2428 ; CHECK: # %bb.0: 2429 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2430 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] 2431 ; CHECK-NEXT: retq 2432 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 2433 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2434 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2435 ret <8 x i32> %res 2436 } 2437 define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) { 2438 ; CHECK-LABEL: test_8xi32_perm_mem_mask0: 2439 ; CHECK: # %bb.0: 2440 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] 2441 ; CHECK-NEXT: retq 2442 %vec = load <8 x i32>, <8 x i32>* %vp 2443 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 2444 ret <8 x i32> %res 2445 } 2446 define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 2447 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask0: 2448 ; CHECK: # %bb.0: 2449 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2450 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] 2451 ; CHECK-NEXT: retq 2452 %vec = load <8 x i32>, <8 x i32>* %vp 2453 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 2454 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2455 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2456 ret <8 x i32> %res 2457 } 2458 2459 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { 2460 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask0: 2461 ; CHECK: # %bb.0: 2462 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2463 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] 2464 ; CHECK-NEXT: retq 2465 %vec = load <8 x i32>, <8 x i32>* %vp 2466 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 2467 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2468 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2469 ret <8 x i32> %res 2470 } 2471 2472 define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 2473 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask1: 2474 ; CHECK: # %bb.0: 2475 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2476 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] 2477 ; CHECK-NEXT: retq 2478 %vec = load <8 x i32>, <8 x i32>* %vp 2479 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 2480 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2481 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2482 ret <8 x i32> %res 2483 } 2484 2485 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { 2486 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask1: 2487 ; CHECK: # %bb.0: 2488 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2489 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] 2490 ; CHECK-NEXT: retq 2491 %vec = load <8 x i32>, <8 x i32>* %vp 2492 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 2493 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2494 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2495 ret <8 x i32> %res 2496 } 2497 2498 define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 2499 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask2: 2500 ; CHECK: # %bb.0: 2501 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2502 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] 2503 ; CHECK-NEXT: retq 2504 %vec = load <8 x i32>, <8 x i32>* %vp 2505 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5> 2506 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2507 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2508 ret <8 x i32> %res 2509 } 2510 2511 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { 2512 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask2: 2513 ; CHECK: # %bb.0: 2514 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2515 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] 2516 ; CHECK-NEXT: retq 2517 %vec = load <8 x i32>, <8 x i32>* %vp 2518 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5> 2519 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2520 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2521 ret <8 x i32> %res 2522 } 2523 2524 define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) { 2525 ; CHECK-LABEL: test_8xi32_perm_mem_mask3: 2526 ; CHECK: # %bb.0: 2527 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] 2528 ; CHECK-NEXT: retq 2529 %vec = load <8 x i32>, <8 x i32>* %vp 2530 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 2531 ret <8 x i32> %res 2532 } 2533 define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 2534 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask3: 2535 ; CHECK: # %bb.0: 2536 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2537 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] 2538 ; CHECK-NEXT: retq 2539 %vec = load <8 x i32>, <8 x i32>* %vp 2540 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 2541 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2542 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 2543 ret <8 x i32> %res 2544 } 2545 2546 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { 2547 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask3: 2548 ; CHECK: # %bb.0: 2549 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2550 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] 2551 ; CHECK-NEXT: retq 2552 %vec = load <8 x i32>, <8 x i32>* %vp 2553 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 2554 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2555 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 2556 ret <8 x i32> %res 2557 } 2558 2559 define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec) { 2560 ; CHECK-LABEL: test_16xi32_perm_mask0: 2561 ; CHECK: # %bb.0: 2562 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] 2563 ; CHECK-NEXT: retq 2564 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 2565 ret <16 x i32> %res 2566 } 2567 define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 2568 ; CHECK-LABEL: test_masked_16xi32_perm_mask0: 2569 ; CHECK: # %bb.0: 2570 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2571 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] 2572 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 2573 ; CHECK-NEXT: retq 2574 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 2575 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2576 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2577 ret <16 x i32> %res 2578 } 2579 2580 define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { 2581 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask0: 2582 ; CHECK: # %bb.0: 2583 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2584 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] 2585 ; CHECK-NEXT: retq 2586 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 2587 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2588 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2589 ret <16 x i32> %res 2590 } 2591 define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 2592 ; CHECK-LABEL: test_masked_16xi32_perm_mask1: 2593 ; CHECK: # %bb.0: 2594 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2595 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] 2596 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 2597 ; CHECK-NEXT: retq 2598 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12> 2599 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2600 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2601 ret <16 x i32> %res 2602 } 2603 2604 define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { 2605 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask1: 2606 ; CHECK: # %bb.0: 2607 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2608 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] 2609 ; CHECK-NEXT: retq 2610 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12> 2611 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2612 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2613 ret <16 x i32> %res 2614 } 2615 define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 2616 ; CHECK-LABEL: test_masked_16xi32_perm_mask2: 2617 ; CHECK: # %bb.0: 2618 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2619 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] 2620 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 2621 ; CHECK-NEXT: retq 2622 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12> 2623 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2624 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2625 ret <16 x i32> %res 2626 } 2627 2628 define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { 2629 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask2: 2630 ; CHECK: # %bb.0: 2631 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2632 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] 2633 ; CHECK-NEXT: retq 2634 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12> 2635 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2636 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2637 ret <16 x i32> %res 2638 } 2639 define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) { 2640 ; CHECK-LABEL: test_16xi32_perm_mask3: 2641 ; CHECK: # %bb.0: 2642 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] 2643 ; CHECK-NEXT: retq 2644 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 2645 ret <16 x i32> %res 2646 } 2647 define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 2648 ; CHECK-LABEL: test_masked_16xi32_perm_mask3: 2649 ; CHECK: # %bb.0: 2650 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2651 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] 2652 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 2653 ; CHECK-NEXT: retq 2654 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 2655 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2656 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2657 ret <16 x i32> %res 2658 } 2659 2660 define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { 2661 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask3: 2662 ; CHECK: # %bb.0: 2663 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2664 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] 2665 ; CHECK-NEXT: retq 2666 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 2667 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2668 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2669 ret <16 x i32> %res 2670 } 2671 define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) { 2672 ; CHECK-LABEL: test_16xi32_perm_mem_mask0: 2673 ; CHECK: # %bb.0: 2674 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] 2675 ; CHECK-NEXT: retq 2676 %vec = load <16 x i32>, <16 x i32>* %vp 2677 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 2678 ret <16 x i32> %res 2679 } 2680 define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 2681 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask0: 2682 ; CHECK: # %bb.0: 2683 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2684 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] 2685 ; CHECK-NEXT: retq 2686 %vec = load <16 x i32>, <16 x i32>* %vp 2687 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 2688 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2689 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2690 ret <16 x i32> %res 2691 } 2692 2693 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { 2694 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask0: 2695 ; CHECK: # %bb.0: 2696 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2697 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] 2698 ; CHECK-NEXT: retq 2699 %vec = load <16 x i32>, <16 x i32>* %vp 2700 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 2701 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2702 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2703 ret <16 x i32> %res 2704 } 2705 2706 define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 2707 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask1: 2708 ; CHECK: # %bb.0: 2709 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2710 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] 2711 ; CHECK-NEXT: retq 2712 %vec = load <16 x i32>, <16 x i32>* %vp 2713 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14> 2714 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2715 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2716 ret <16 x i32> %res 2717 } 2718 2719 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { 2720 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask1: 2721 ; CHECK: # %bb.0: 2722 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2723 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] 2724 ; CHECK-NEXT: retq 2725 %vec = load <16 x i32>, <16 x i32>* %vp 2726 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14> 2727 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2728 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2729 ret <16 x i32> %res 2730 } 2731 2732 define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 2733 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask2: 2734 ; CHECK: # %bb.0: 2735 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2736 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] 2737 ; CHECK-NEXT: retq 2738 %vec = load <16 x i32>, <16 x i32>* %vp 2739 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14> 2740 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2741 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2742 ret <16 x i32> %res 2743 } 2744 2745 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { 2746 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask2: 2747 ; CHECK: # %bb.0: 2748 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2749 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] 2750 ; CHECK-NEXT: retq 2751 %vec = load <16 x i32>, <16 x i32>* %vp 2752 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14> 2753 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2754 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2755 ret <16 x i32> %res 2756 } 2757 2758 define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) { 2759 ; CHECK-LABEL: test_16xi32_perm_mem_mask3: 2760 ; CHECK: # %bb.0: 2761 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] 2762 ; CHECK-NEXT: retq 2763 %vec = load <16 x i32>, <16 x i32>* %vp 2764 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 2765 ret <16 x i32> %res 2766 } 2767 define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 2768 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask3: 2769 ; CHECK: # %bb.0: 2770 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2771 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] 2772 ; CHECK-NEXT: retq 2773 %vec = load <16 x i32>, <16 x i32>* %vp 2774 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 2775 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2776 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 2777 ret <16 x i32> %res 2778 } 2779 2780 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { 2781 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask3: 2782 ; CHECK: # %bb.0: 2783 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2784 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] 2785 ; CHECK-NEXT: retq 2786 %vec = load <16 x i32>, <16 x i32>* %vp 2787 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 2788 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 2789 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 2790 ret <16 x i32> %res 2791 } 2792 2793