1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX 4 5 ; This test is an assembly of avx512 shuffling instructions to check their scheduling 6 7 define <16 x i16> @test_16xi16_perm_mask0(<16 x i16> %vec) { 8 ; GENERIC-LABEL: test_16xi16_perm_mask0: 9 ; GENERIC: # %bb.0: 10 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 11 ; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 12 ; GENERIC-NEXT: retq # sched: [1:1.00] 13 ; 14 ; SKX-LABEL: test_16xi16_perm_mask0: 15 ; SKX: # %bb.0: 16 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 17 ; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00] 18 ; SKX-NEXT: retq # sched: [7:1.00] 19 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 20 ret <16 x i16> %res 21 } 22 define <16 x i16> @test_masked_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 23 ; GENERIC-LABEL: test_masked_16xi16_perm_mask0: 24 ; GENERIC: # %bb.0: 25 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 26 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 27 ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 28 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 29 ; GENERIC-NEXT: retq # sched: [1:1.00] 30 ; 31 ; SKX-LABEL: test_masked_16xi16_perm_mask0: 32 ; SKX: # %bb.0: 33 ; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 34 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 35 ; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] 36 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 37 ; SKX-NEXT: retq # sched: [7:1.00] 38 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 39 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 40 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 41 ret <16 x i16> %res 42 } 43 44 define <16 x i16> @test_masked_z_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %mask) { 45 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask0: 46 ; GENERIC: # %bb.0: 47 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 48 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 49 ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 50 ; GENERIC-NEXT: retq # sched: [1:1.00] 51 ; 52 ; SKX-LABEL: test_masked_z_16xi16_perm_mask0: 53 ; SKX: # %bb.0: 54 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 55 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 56 ; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] 57 ; SKX-NEXT: retq # sched: [7:1.00] 58 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 59 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 60 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 61 ret <16 x i16> %res 62 } 63 define <16 x i16> @test_masked_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 64 ; GENERIC-LABEL: test_masked_16xi16_perm_mask1: 65 ; GENERIC: # %bb.0: 66 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 67 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 68 ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 69 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 70 ; GENERIC-NEXT: retq # sched: [1:1.00] 71 ; 72 ; SKX-LABEL: test_masked_16xi16_perm_mask1: 73 ; SKX: # %bb.0: 74 ; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 75 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 76 ; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] 77 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 78 ; SKX-NEXT: retq # sched: [7:1.00] 79 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 80 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 81 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 82 ret <16 x i16> %res 83 } 84 85 define <16 x i16> @test_masked_z_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %mask) { 86 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask1: 87 ; GENERIC: # %bb.0: 88 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 89 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 90 ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 91 ; GENERIC-NEXT: retq # sched: [1:1.00] 92 ; 93 ; SKX-LABEL: test_masked_z_16xi16_perm_mask1: 94 ; SKX: # %bb.0: 95 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 96 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 97 ; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] 98 ; SKX-NEXT: retq # sched: [7:1.00] 99 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 100 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 101 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 102 ret <16 x i16> %res 103 } 104 define <16 x i16> @test_masked_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 105 ; GENERIC-LABEL: test_masked_16xi16_perm_mask2: 106 ; GENERIC: # %bb.0: 107 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 108 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 109 ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 110 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 111 ; GENERIC-NEXT: retq # sched: [1:1.00] 112 ; 113 ; SKX-LABEL: test_masked_16xi16_perm_mask2: 114 ; SKX: # %bb.0: 115 ; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 116 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 117 ; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] 118 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 119 ; SKX-NEXT: retq # sched: [7:1.00] 120 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 121 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 122 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 123 ret <16 x i16> %res 124 } 125 126 define <16 x i16> @test_masked_z_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %mask) { 127 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask2: 128 ; GENERIC: # %bb.0: 129 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 130 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 131 ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 132 ; GENERIC-NEXT: retq # sched: [1:1.00] 133 ; 134 ; SKX-LABEL: test_masked_z_16xi16_perm_mask2: 135 ; SKX: # %bb.0: 136 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 137 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 138 ; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] 139 ; SKX-NEXT: retq # sched: [7:1.00] 140 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 141 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 142 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 143 ret <16 x i16> %res 144 } 145 define <16 x i16> @test_16xi16_perm_mask3(<16 x i16> %vec) { 146 ; GENERIC-LABEL: test_16xi16_perm_mask3: 147 ; GENERIC: # %bb.0: 148 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 149 ; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 150 ; GENERIC-NEXT: retq # sched: [1:1.00] 151 ; 152 ; SKX-LABEL: test_16xi16_perm_mask3: 153 ; SKX: # %bb.0: 154 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 155 ; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00] 156 ; SKX-NEXT: retq # sched: [7:1.00] 157 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 158 ret <16 x i16> %res 159 } 160 define <16 x i16> @test_masked_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 161 ; GENERIC-LABEL: test_masked_16xi16_perm_mask3: 162 ; GENERIC: # %bb.0: 163 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 164 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 165 ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 166 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 167 ; GENERIC-NEXT: retq # sched: [1:1.00] 168 ; 169 ; SKX-LABEL: test_masked_16xi16_perm_mask3: 170 ; SKX: # %bb.0: 171 ; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 172 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 173 ; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] 174 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 175 ; SKX-NEXT: retq # sched: [7:1.00] 176 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 177 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 178 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 179 ret <16 x i16> %res 180 } 181 182 define <16 x i16> @test_masked_z_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %mask) { 183 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask3: 184 ; GENERIC: # %bb.0: 185 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 186 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 187 ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 188 ; GENERIC-NEXT: retq # sched: [1:1.00] 189 ; 190 ; SKX-LABEL: test_masked_z_16xi16_perm_mask3: 191 ; SKX: # %bb.0: 192 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 193 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 194 ; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] 195 ; SKX-NEXT: retq # sched: [7:1.00] 196 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 197 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 198 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 199 ret <16 x i16> %res 200 } 201 define <16 x i16> @test_16xi16_perm_mem_mask0(<16 x i16>* %vp) { 202 ; GENERIC-LABEL: test_16xi16_perm_mem_mask0: 203 ; GENERIC: # %bb.0: 204 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 205 ; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 206 ; GENERIC-NEXT: retq # sched: [1:1.00] 207 ; 208 ; SKX-LABEL: test_16xi16_perm_mem_mask0: 209 ; SKX: # %bb.0: 210 ; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 211 ; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 212 ; SKX-NEXT: retq # sched: [7:1.00] 213 %vec = load <16 x i16>, <16 x i16>* %vp 214 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 215 ret <16 x i16> %res 216 } 217 define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 218 ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask0: 219 ; GENERIC: # %bb.0: 220 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 221 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 222 ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 223 ; GENERIC-NEXT: retq # sched: [1:1.00] 224 ; 225 ; SKX-LABEL: test_masked_16xi16_perm_mem_mask0: 226 ; SKX: # %bb.0: 227 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 228 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 229 ; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] 230 ; SKX-NEXT: retq # sched: [7:1.00] 231 %vec = load <16 x i16>, <16 x i16>* %vp 232 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 233 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 234 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 235 ret <16 x i16> %res 236 } 237 238 define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { 239 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask0: 240 ; GENERIC: # %bb.0: 241 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 242 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 243 ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 244 ; GENERIC-NEXT: retq # sched: [1:1.00] 245 ; 246 ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask0: 247 ; SKX: # %bb.0: 248 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 249 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 250 ; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] 251 ; SKX-NEXT: retq # sched: [7:1.00] 252 %vec = load <16 x i16>, <16 x i16>* %vp 253 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 254 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 255 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 256 ret <16 x i16> %res 257 } 258 259 define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 260 ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask1: 261 ; GENERIC: # %bb.0: 262 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 263 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 264 ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 265 ; GENERIC-NEXT: retq # sched: [1:1.00] 266 ; 267 ; SKX-LABEL: test_masked_16xi16_perm_mem_mask1: 268 ; SKX: # %bb.0: 269 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 270 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 271 ; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] 272 ; SKX-NEXT: retq # sched: [7:1.00] 273 %vec = load <16 x i16>, <16 x i16>* %vp 274 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 275 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 276 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 277 ret <16 x i16> %res 278 } 279 280 define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { 281 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask1: 282 ; GENERIC: # %bb.0: 283 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 284 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 285 ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 286 ; GENERIC-NEXT: retq # sched: [1:1.00] 287 ; 288 ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask1: 289 ; SKX: # %bb.0: 290 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 291 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 292 ; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] 293 ; SKX-NEXT: retq # sched: [7:1.00] 294 %vec = load <16 x i16>, <16 x i16>* %vp 295 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 296 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 297 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 298 ret <16 x i16> %res 299 } 300 301 define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 302 ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask2: 303 ; GENERIC: # %bb.0: 304 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 305 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 306 ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 307 ; GENERIC-NEXT: retq # sched: [1:1.00] 308 ; 309 ; SKX-LABEL: test_masked_16xi16_perm_mem_mask2: 310 ; SKX: # %bb.0: 311 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 312 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 313 ; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] 314 ; SKX-NEXT: retq # sched: [7:1.00] 315 %vec = load <16 x i16>, <16 x i16>* %vp 316 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 317 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 318 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 319 ret <16 x i16> %res 320 } 321 322 define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { 323 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask2: 324 ; GENERIC: # %bb.0: 325 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 326 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 327 ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 328 ; GENERIC-NEXT: retq # sched: [1:1.00] 329 ; 330 ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask2: 331 ; SKX: # %bb.0: 332 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 333 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 334 ; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] 335 ; SKX-NEXT: retq # sched: [7:1.00] 336 %vec = load <16 x i16>, <16 x i16>* %vp 337 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 338 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 339 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 340 ret <16 x i16> %res 341 } 342 343 define <16 x i16> @test_16xi16_perm_mem_mask3(<16 x i16>* %vp) { 344 ; GENERIC-LABEL: test_16xi16_perm_mem_mask3: 345 ; GENERIC: # %bb.0: 346 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 347 ; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 348 ; GENERIC-NEXT: retq # sched: [1:1.00] 349 ; 350 ; SKX-LABEL: test_16xi16_perm_mem_mask3: 351 ; SKX: # %bb.0: 352 ; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 353 ; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 354 ; SKX-NEXT: retq # sched: [7:1.00] 355 %vec = load <16 x i16>, <16 x i16>* %vp 356 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 357 ret <16 x i16> %res 358 } 359 define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 360 ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask3: 361 ; GENERIC: # %bb.0: 362 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 363 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 364 ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 365 ; GENERIC-NEXT: retq # sched: [1:1.00] 366 ; 367 ; SKX-LABEL: test_masked_16xi16_perm_mem_mask3: 368 ; SKX: # %bb.0: 369 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 370 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 371 ; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] 372 ; SKX-NEXT: retq # sched: [7:1.00] 373 %vec = load <16 x i16>, <16 x i16>* %vp 374 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 375 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 376 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 377 ret <16 x i16> %res 378 } 379 380 define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { 381 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask3: 382 ; GENERIC: # %bb.0: 383 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 384 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 385 ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 386 ; GENERIC-NEXT: retq # sched: [1:1.00] 387 ; 388 ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask3: 389 ; SKX: # %bb.0: 390 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 391 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 392 ; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] 393 ; SKX-NEXT: retq # sched: [7:1.00] 394 %vec = load <16 x i16>, <16 x i16>* %vp 395 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 396 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 397 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 398 ret <16 x i16> %res 399 } 400 401 define <32 x i16> @test_32xi16_perm_mask0(<32 x i16> %vec) { 402 ; GENERIC-LABEL: test_32xi16_perm_mask0: 403 ; GENERIC: # %bb.0: 404 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] 405 ; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 406 ; GENERIC-NEXT: retq # sched: [1:1.00] 407 ; 408 ; SKX-LABEL: test_32xi16_perm_mask0: 409 ; SKX: # %bb.0: 410 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] 411 ; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00] 412 ; SKX-NEXT: retq # sched: [7:1.00] 413 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10> 414 ret <32 x i16> %res 415 } 416 define <32 x i16> @test_masked_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 417 ; GENERIC-LABEL: test_masked_32xi16_perm_mask0: 418 ; GENERIC: # %bb.0: 419 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] 420 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 421 ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 422 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 423 ; GENERIC-NEXT: retq # sched: [1:1.00] 424 ; 425 ; SKX-LABEL: test_masked_32xi16_perm_mask0: 426 ; SKX: # %bb.0: 427 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] 428 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 429 ; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] 430 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 431 ; SKX-NEXT: retq # sched: [7:1.00] 432 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10> 433 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 434 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 435 ret <32 x i16> %res 436 } 437 438 define <32 x i16> @test_masked_z_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %mask) { 439 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask0: 440 ; GENERIC: # %bb.0: 441 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] 442 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 443 ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 444 ; GENERIC-NEXT: retq # sched: [1:1.00] 445 ; 446 ; SKX-LABEL: test_masked_z_32xi16_perm_mask0: 447 ; SKX: # %bb.0: 448 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] 449 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 450 ; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] 451 ; SKX-NEXT: retq # sched: [7:1.00] 452 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10> 453 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 454 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 455 ret <32 x i16> %res 456 } 457 define <32 x i16> @test_masked_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 458 ; GENERIC-LABEL: test_masked_32xi16_perm_mask1: 459 ; GENERIC: # %bb.0: 460 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50] 461 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 462 ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 463 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 464 ; GENERIC-NEXT: retq # sched: [1:1.00] 465 ; 466 ; SKX-LABEL: test_masked_32xi16_perm_mask1: 467 ; SKX: # %bb.0: 468 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50] 469 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 470 ; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] 471 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 472 ; SKX-NEXT: retq # sched: [7:1.00] 473 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16> 474 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 475 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 476 ret <32 x i16> %res 477 } 478 479 define <32 x i16> @test_masked_z_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %mask) { 480 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask1: 481 ; GENERIC: # %bb.0: 482 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50] 483 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 484 ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 485 ; GENERIC-NEXT: retq # sched: [1:1.00] 486 ; 487 ; SKX-LABEL: test_masked_z_32xi16_perm_mask1: 488 ; SKX: # %bb.0: 489 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50] 490 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 491 ; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] 492 ; SKX-NEXT: retq # sched: [7:1.00] 493 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16> 494 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 495 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 496 ret <32 x i16> %res 497 } 498 define <32 x i16> @test_masked_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 499 ; GENERIC-LABEL: test_masked_32xi16_perm_mask2: 500 ; GENERIC: # %bb.0: 501 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50] 502 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 503 ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 504 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 505 ; GENERIC-NEXT: retq # sched: [1:1.00] 506 ; 507 ; SKX-LABEL: test_masked_32xi16_perm_mask2: 508 ; SKX: # %bb.0: 509 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50] 510 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 511 ; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] 512 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 513 ; SKX-NEXT: retq # sched: [7:1.00] 514 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27> 515 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 516 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 517 ret <32 x i16> %res 518 } 519 520 define <32 x i16> @test_masked_z_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %mask) { 521 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask2: 522 ; GENERIC: # %bb.0: 523 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50] 524 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 525 ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 526 ; GENERIC-NEXT: retq # sched: [1:1.00] 527 ; 528 ; SKX-LABEL: test_masked_z_32xi16_perm_mask2: 529 ; SKX: # %bb.0: 530 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50] 531 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 532 ; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] 533 ; SKX-NEXT: retq # sched: [7:1.00] 534 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27> 535 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 536 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 537 ret <32 x i16> %res 538 } 539 define <32 x i16> @test_32xi16_perm_mask3(<32 x i16> %vec) { 540 ; GENERIC-LABEL: test_32xi16_perm_mask3: 541 ; GENERIC: # %bb.0: 542 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] 543 ; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 544 ; GENERIC-NEXT: retq # sched: [1:1.00] 545 ; 546 ; SKX-LABEL: test_32xi16_perm_mask3: 547 ; SKX: # %bb.0: 548 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] 549 ; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00] 550 ; SKX-NEXT: retq # sched: [7:1.00] 551 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4> 552 ret <32 x i16> %res 553 } 554 define <32 x i16> @test_masked_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 555 ; GENERIC-LABEL: test_masked_32xi16_perm_mask3: 556 ; GENERIC: # %bb.0: 557 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] 558 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 559 ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 560 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 561 ; GENERIC-NEXT: retq # sched: [1:1.00] 562 ; 563 ; SKX-LABEL: test_masked_32xi16_perm_mask3: 564 ; SKX: # %bb.0: 565 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] 566 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 567 ; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] 568 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 569 ; SKX-NEXT: retq # sched: [7:1.00] 570 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4> 571 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 572 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 573 ret <32 x i16> %res 574 } 575 576 define <32 x i16> @test_masked_z_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %mask) { 577 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask3: 578 ; GENERIC: # %bb.0: 579 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] 580 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 581 ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 582 ; GENERIC-NEXT: retq # sched: [1:1.00] 583 ; 584 ; SKX-LABEL: test_masked_z_32xi16_perm_mask3: 585 ; SKX: # %bb.0: 586 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] 587 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 588 ; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] 589 ; SKX-NEXT: retq # sched: [7:1.00] 590 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4> 591 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 592 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 593 ret <32 x i16> %res 594 } 595 define <32 x i16> @test_32xi16_perm_mem_mask0(<32 x i16>* %vp) { 596 ; GENERIC-LABEL: test_32xi16_perm_mem_mask0: 597 ; GENERIC: # %bb.0: 598 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] 599 ; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 600 ; GENERIC-NEXT: retq # sched: [1:1.00] 601 ; 602 ; SKX-LABEL: test_32xi16_perm_mem_mask0: 603 ; SKX: # %bb.0: 604 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] 605 ; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00] 606 ; SKX-NEXT: retq # sched: [7:1.00] 607 %vec = load <32 x i16>, <32 x i16>* %vp 608 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12> 609 ret <32 x i16> %res 610 } 611 define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 612 ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask0: 613 ; GENERIC: # %bb.0: 614 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] 615 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 616 ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 617 ; GENERIC-NEXT: retq # sched: [1:1.00] 618 ; 619 ; SKX-LABEL: test_masked_32xi16_perm_mem_mask0: 620 ; SKX: # %bb.0: 621 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] 622 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 623 ; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] 624 ; SKX-NEXT: retq # sched: [7:1.00] 625 %vec = load <32 x i16>, <32 x i16>* %vp 626 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12> 627 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 628 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 629 ret <32 x i16> %res 630 } 631 632 define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { 633 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask0: 634 ; GENERIC: # %bb.0: 635 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] 636 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 637 ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 638 ; GENERIC-NEXT: retq # sched: [1:1.00] 639 ; 640 ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask0: 641 ; SKX: # %bb.0: 642 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] 643 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 644 ; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] 645 ; SKX-NEXT: retq # sched: [7:1.00] 646 %vec = load <32 x i16>, <32 x i16>* %vp 647 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12> 648 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 649 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 650 ret <32 x i16> %res 651 } 652 653 define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 654 ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask1: 655 ; GENERIC: # %bb.0: 656 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50] 657 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 658 ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 659 ; GENERIC-NEXT: retq # sched: [1:1.00] 660 ; 661 ; SKX-LABEL: test_masked_32xi16_perm_mem_mask1: 662 ; SKX: # %bb.0: 663 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50] 664 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 665 ; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] 666 ; SKX-NEXT: retq # sched: [7:1.00] 667 %vec = load <32 x i16>, <32 x i16>* %vp 668 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6> 669 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 670 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 671 ret <32 x i16> %res 672 } 673 674 define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { 675 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask1: 676 ; GENERIC: # %bb.0: 677 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50] 678 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 679 ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 680 ; GENERIC-NEXT: retq # sched: [1:1.00] 681 ; 682 ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask1: 683 ; SKX: # %bb.0: 684 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50] 685 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 686 ; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] 687 ; SKX-NEXT: retq # sched: [7:1.00] 688 %vec = load <32 x i16>, <32 x i16>* %vp 689 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6> 690 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 691 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 692 ret <32 x i16> %res 693 } 694 695 define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 696 ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask2: 697 ; GENERIC: # %bb.0: 698 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50] 699 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 700 ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 701 ; GENERIC-NEXT: retq # sched: [1:1.00] 702 ; 703 ; SKX-LABEL: test_masked_32xi16_perm_mem_mask2: 704 ; SKX: # %bb.0: 705 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50] 706 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 707 ; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] 708 ; SKX-NEXT: retq # sched: [7:1.00] 709 %vec = load <32 x i16>, <32 x i16>* %vp 710 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25> 711 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 712 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 713 ret <32 x i16> %res 714 } 715 716 define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { 717 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask2: 718 ; GENERIC: # %bb.0: 719 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50] 720 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 721 ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 722 ; GENERIC-NEXT: retq # sched: [1:1.00] 723 ; 724 ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask2: 725 ; SKX: # %bb.0: 726 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50] 727 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 728 ; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] 729 ; SKX-NEXT: retq # sched: [7:1.00] 730 %vec = load <32 x i16>, <32 x i16>* %vp 731 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25> 732 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 733 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 734 ret <32 x i16> %res 735 } 736 737 define <32 x i16> @test_32xi16_perm_mem_mask3(<32 x i16>* %vp) { 738 ; GENERIC-LABEL: test_32xi16_perm_mem_mask3: 739 ; GENERIC: # %bb.0: 740 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] 741 ; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 742 ; GENERIC-NEXT: retq # sched: [1:1.00] 743 ; 744 ; SKX-LABEL: test_32xi16_perm_mem_mask3: 745 ; SKX: # %bb.0: 746 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] 747 ; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00] 748 ; SKX-NEXT: retq # sched: [7:1.00] 749 %vec = load <32 x i16>, <32 x i16>* %vp 750 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27> 751 ret <32 x i16> %res 752 } 753 define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 754 ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask3: 755 ; GENERIC: # %bb.0: 756 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] 757 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 758 ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 759 ; GENERIC-NEXT: retq # sched: [1:1.00] 760 ; 761 ; SKX-LABEL: test_masked_32xi16_perm_mem_mask3: 762 ; SKX: # %bb.0: 763 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] 764 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 765 ; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] 766 ; SKX-NEXT: retq # sched: [7:1.00] 767 %vec = load <32 x i16>, <32 x i16>* %vp 768 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27> 769 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 770 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 771 ret <32 x i16> %res 772 } 773 774 define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { 775 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask3: 776 ; GENERIC: # %bb.0: 777 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] 778 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 779 ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 780 ; GENERIC-NEXT: retq # sched: [1:1.00] 781 ; 782 ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask3: 783 ; SKX: # %bb.0: 784 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] 785 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 786 ; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] 787 ; SKX-NEXT: retq # sched: [7:1.00] 788 %vec = load <32 x i16>, <32 x i16>* %vp 789 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27> 790 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 791 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 792 ret <32 x i16> %res 793 } 794 795 define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) { 796 ; GENERIC-LABEL: test_8xi32_perm_mask0: 797 ; GENERIC: # %bb.0: 798 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 799 ; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 800 ; GENERIC-NEXT: retq # sched: [1:1.00] 801 ; 802 ; SKX-LABEL: test_8xi32_perm_mask0: 803 ; SKX: # %bb.0: 804 ; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 805 ; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 806 ; SKX-NEXT: retq # sched: [7:1.00] 807 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6> 808 ret <8 x i32> %res 809 } 810 define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 811 ; GENERIC-LABEL: test_masked_8xi32_perm_mask0: 812 ; GENERIC: # %bb.0: 813 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 814 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 815 ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 816 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 817 ; GENERIC-NEXT: retq # sched: [1:1.00] 818 ; 819 ; SKX-LABEL: test_masked_8xi32_perm_mask0: 820 ; SKX: # %bb.0: 821 ; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 822 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 823 ; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 824 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 825 ; SKX-NEXT: retq # sched: [7:1.00] 826 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6> 827 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 828 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 829 ret <8 x i32> %res 830 } 831 832 define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { 833 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask0: 834 ; GENERIC: # %bb.0: 835 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 836 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 837 ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 838 ; GENERIC-NEXT: retq # sched: [1:1.00] 839 ; 840 ; SKX-LABEL: test_masked_z_8xi32_perm_mask0: 841 ; SKX: # %bb.0: 842 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50] 843 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 844 ; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 845 ; SKX-NEXT: retq # sched: [7:1.00] 846 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6> 847 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 848 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 849 ret <8 x i32> %res 850 } 851 define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 852 ; GENERIC-LABEL: test_masked_8xi32_perm_mask1: 853 ; GENERIC: # %bb.0: 854 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50] 855 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 856 ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 857 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 858 ; GENERIC-NEXT: retq # sched: [1:1.00] 859 ; 860 ; SKX-LABEL: test_masked_8xi32_perm_mask1: 861 ; SKX: # %bb.0: 862 ; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50] 863 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 864 ; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 865 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 866 ; SKX-NEXT: retq # sched: [7:1.00] 867 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3> 868 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 869 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 870 ret <8 x i32> %res 871 } 872 873 define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { 874 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask1: 875 ; GENERIC: # %bb.0: 876 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50] 877 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 878 ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 879 ; GENERIC-NEXT: retq # sched: [1:1.00] 880 ; 881 ; SKX-LABEL: test_masked_z_8xi32_perm_mask1: 882 ; SKX: # %bb.0: 883 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50] 884 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 885 ; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 886 ; SKX-NEXT: retq # sched: [7:1.00] 887 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3> 888 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 889 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 890 ret <8 x i32> %res 891 } 892 define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 893 ; GENERIC-LABEL: test_masked_8xi32_perm_mask2: 894 ; GENERIC: # %bb.0: 895 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50] 896 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 897 ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 898 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 899 ; GENERIC-NEXT: retq # sched: [1:1.00] 900 ; 901 ; SKX-LABEL: test_masked_8xi32_perm_mask2: 902 ; SKX: # %bb.0: 903 ; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50] 904 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 905 ; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 906 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 907 ; SKX-NEXT: retq # sched: [7:1.00] 908 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4> 909 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 910 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 911 ret <8 x i32> %res 912 } 913 914 define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { 915 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask2: 916 ; GENERIC: # %bb.0: 917 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50] 918 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 919 ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 920 ; GENERIC-NEXT: retq # sched: [1:1.00] 921 ; 922 ; SKX-LABEL: test_masked_z_8xi32_perm_mask2: 923 ; SKX: # %bb.0: 924 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50] 925 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 926 ; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 927 ; SKX-NEXT: retq # sched: [7:1.00] 928 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4> 929 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 930 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 931 ret <8 x i32> %res 932 } 933 define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) { 934 ; GENERIC-LABEL: test_8xi32_perm_mask3: 935 ; GENERIC: # %bb.0: 936 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 937 ; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 938 ; GENERIC-NEXT: retq # sched: [1:1.00] 939 ; 940 ; SKX-LABEL: test_8xi32_perm_mask3: 941 ; SKX: # %bb.0: 942 ; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 943 ; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 944 ; SKX-NEXT: retq # sched: [7:1.00] 945 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0> 946 ret <8 x i32> %res 947 } 948 define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 949 ; GENERIC-LABEL: test_masked_8xi32_perm_mask3: 950 ; GENERIC: # %bb.0: 951 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 952 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 953 ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 954 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 955 ; GENERIC-NEXT: retq # sched: [1:1.00] 956 ; 957 ; SKX-LABEL: test_masked_8xi32_perm_mask3: 958 ; SKX: # %bb.0: 959 ; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 960 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 961 ; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 962 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 963 ; SKX-NEXT: retq # sched: [7:1.00] 964 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0> 965 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 966 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 967 ret <8 x i32> %res 968 } 969 970 define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { 971 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask3: 972 ; GENERIC: # %bb.0: 973 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 974 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 975 ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 976 ; GENERIC-NEXT: retq # sched: [1:1.00] 977 ; 978 ; SKX-LABEL: test_masked_z_8xi32_perm_mask3: 979 ; SKX: # %bb.0: 980 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50] 981 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 982 ; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 983 ; SKX-NEXT: retq # sched: [7:1.00] 984 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0> 985 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 986 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 987 ret <8 x i32> %res 988 } 989 define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) { 990 ; GENERIC-LABEL: test_8xi32_perm_mem_mask0: 991 ; GENERIC: # %bb.0: 992 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 993 ; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 994 ; GENERIC-NEXT: retq # sched: [1:1.00] 995 ; 996 ; SKX-LABEL: test_8xi32_perm_mem_mask0: 997 ; SKX: # %bb.0: 998 ; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 999 ; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 1000 ; SKX-NEXT: retq # sched: [7:1.00] 1001 %vec = load <8 x i32>, <8 x i32>* %vp 1002 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5> 1003 ret <8 x i32> %res 1004 } 1005 define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 1006 ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask0: 1007 ; GENERIC: # %bb.0: 1008 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 1009 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 1010 ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 1011 ; GENERIC-NEXT: retq # sched: [1:1.00] 1012 ; 1013 ; SKX-LABEL: test_masked_8xi32_perm_mem_mask0: 1014 ; SKX: # %bb.0: 1015 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 1016 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 1017 ; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 1018 ; SKX-NEXT: retq # sched: [7:1.00] 1019 %vec = load <8 x i32>, <8 x i32>* %vp 1020 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5> 1021 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1022 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 1023 ret <8 x i32> %res 1024 } 1025 1026 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { 1027 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask0: 1028 ; GENERIC: # %bb.0: 1029 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 1030 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 1031 ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 1032 ; GENERIC-NEXT: retq # sched: [1:1.00] 1033 ; 1034 ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask0: 1035 ; SKX: # %bb.0: 1036 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] 1037 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 1038 ; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 1039 ; SKX-NEXT: retq # sched: [7:1.00] 1040 %vec = load <8 x i32>, <8 x i32>* %vp 1041 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5> 1042 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1043 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1044 ret <8 x i32> %res 1045 } 1046 1047 define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 1048 ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask1: 1049 ; GENERIC: # %bb.0: 1050 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50] 1051 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 1052 ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 1053 ; GENERIC-NEXT: retq # sched: [1:1.00] 1054 ; 1055 ; SKX-LABEL: test_masked_8xi32_perm_mem_mask1: 1056 ; SKX: # %bb.0: 1057 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50] 1058 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 1059 ; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 1060 ; SKX-NEXT: retq # sched: [7:1.00] 1061 %vec = load <8 x i32>, <8 x i32>* %vp 1062 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5> 1063 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1064 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 1065 ret <8 x i32> %res 1066 } 1067 1068 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { 1069 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask1: 1070 ; GENERIC: # %bb.0: 1071 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] 1072 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 1073 ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 1074 ; GENERIC-NEXT: retq # sched: [1:1.00] 1075 ; 1076 ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask1: 1077 ; SKX: # %bb.0: 1078 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] 1079 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 1080 ; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 1081 ; SKX-NEXT: retq # sched: [7:1.00] 1082 %vec = load <8 x i32>, <8 x i32>* %vp 1083 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5> 1084 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1085 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1086 ret <8 x i32> %res 1087 } 1088 1089 define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 1090 ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask2: 1091 ; GENERIC: # %bb.0: 1092 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50] 1093 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 1094 ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 1095 ; GENERIC-NEXT: retq # sched: [1:1.00] 1096 ; 1097 ; SKX-LABEL: test_masked_8xi32_perm_mem_mask2: 1098 ; SKX: # %bb.0: 1099 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50] 1100 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 1101 ; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 1102 ; SKX-NEXT: retq # sched: [7:1.00] 1103 %vec = load <8 x i32>, <8 x i32>* %vp 1104 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3> 1105 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1106 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 1107 ret <8 x i32> %res 1108 } 1109 1110 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { 1111 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask2: 1112 ; GENERIC: # %bb.0: 1113 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] 1114 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 1115 ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 1116 ; GENERIC-NEXT: retq # sched: [1:1.00] 1117 ; 1118 ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask2: 1119 ; SKX: # %bb.0: 1120 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] 1121 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 1122 ; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 1123 ; SKX-NEXT: retq # sched: [7:1.00] 1124 %vec = load <8 x i32>, <8 x i32>* %vp 1125 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3> 1126 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1127 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1128 ret <8 x i32> %res 1129 } 1130 1131 define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) { 1132 ; GENERIC-LABEL: test_8xi32_perm_mem_mask3: 1133 ; GENERIC: # %bb.0: 1134 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1135 ; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 1136 ; GENERIC-NEXT: retq # sched: [1:1.00] 1137 ; 1138 ; SKX-LABEL: test_8xi32_perm_mem_mask3: 1139 ; SKX: # %bb.0: 1140 ; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1141 ; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 1142 ; SKX-NEXT: retq # sched: [7:1.00] 1143 %vec = load <8 x i32>, <8 x i32>* %vp 1144 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5> 1145 ret <8 x i32> %res 1146 } 1147 define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 1148 ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask3: 1149 ; GENERIC: # %bb.0: 1150 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1151 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 1152 ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 1153 ; GENERIC-NEXT: retq # sched: [1:1.00] 1154 ; 1155 ; SKX-LABEL: test_masked_8xi32_perm_mem_mask3: 1156 ; SKX: # %bb.0: 1157 ; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1158 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 1159 ; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 1160 ; SKX-NEXT: retq # sched: [7:1.00] 1161 %vec = load <8 x i32>, <8 x i32>* %vp 1162 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5> 1163 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1164 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 1165 ret <8 x i32> %res 1166 } 1167 1168 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { 1169 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask3: 1170 ; GENERIC: # %bb.0: 1171 ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1172 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 1173 ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 1174 ; GENERIC-NEXT: retq # sched: [1:1.00] 1175 ; 1176 ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask3: 1177 ; SKX: # %bb.0: 1178 ; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] 1179 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 1180 ; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 1181 ; SKX-NEXT: retq # sched: [7:1.00] 1182 %vec = load <8 x i32>, <8 x i32>* %vp 1183 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5> 1184 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 1185 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 1186 ret <8 x i32> %res 1187 } 1188 1189 define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { 1190 ; GENERIC-LABEL: test_16xi32_perm_mask0: 1191 ; GENERIC: # %bb.0: 1192 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] 1193 ; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 1194 ; GENERIC-NEXT: retq # sched: [1:1.00] 1195 ; 1196 ; SKX-LABEL: test_16xi32_perm_mask0: 1197 ; SKX: # %bb.0: 1198 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] 1199 ; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 1200 ; SKX-NEXT: retq # sched: [7:1.00] 1201 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7> 1202 ret <16 x i32> %res 1203 } 1204 define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 1205 ; GENERIC-LABEL: test_masked_16xi32_perm_mask0: 1206 ; GENERIC: # %bb.0: 1207 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] 1208 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 1209 ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 1210 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1211 ; GENERIC-NEXT: retq # sched: [1:1.00] 1212 ; 1213 ; SKX-LABEL: test_masked_16xi32_perm_mask0: 1214 ; SKX: # %bb.0: 1215 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] 1216 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 1217 ; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 1218 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 1219 ; SKX-NEXT: retq # sched: [7:1.00] 1220 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7> 1221 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1222 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1223 ret <16 x i32> %res 1224 } 1225 1226 define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { 1227 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask0: 1228 ; GENERIC: # %bb.0: 1229 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] 1230 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1231 ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 1232 ; GENERIC-NEXT: retq # sched: [1:1.00] 1233 ; 1234 ; SKX-LABEL: test_masked_z_16xi32_perm_mask0: 1235 ; SKX: # %bb.0: 1236 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] 1237 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1238 ; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 1239 ; SKX-NEXT: retq # sched: [7:1.00] 1240 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7> 1241 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1242 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1243 ret <16 x i32> %res 1244 } 1245 define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 1246 ; GENERIC-LABEL: test_masked_16xi32_perm_mask1: 1247 ; GENERIC: # %bb.0: 1248 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50] 1249 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 1250 ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 1251 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1252 ; GENERIC-NEXT: retq # sched: [1:1.00] 1253 ; 1254 ; SKX-LABEL: test_masked_16xi32_perm_mask1: 1255 ; SKX: # %bb.0: 1256 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50] 1257 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 1258 ; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 1259 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 1260 ; SKX-NEXT: retq # sched: [7:1.00] 1261 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3> 1262 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1263 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1264 ret <16 x i32> %res 1265 } 1266 1267 define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { 1268 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask1: 1269 ; GENERIC: # %bb.0: 1270 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50] 1271 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1272 ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 1273 ; GENERIC-NEXT: retq # sched: [1:1.00] 1274 ; 1275 ; SKX-LABEL: test_masked_z_16xi32_perm_mask1: 1276 ; SKX: # %bb.0: 1277 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50] 1278 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1279 ; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 1280 ; SKX-NEXT: retq # sched: [7:1.00] 1281 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3> 1282 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1283 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1284 ret <16 x i32> %res 1285 } 1286 define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 1287 ; GENERIC-LABEL: test_masked_16xi32_perm_mask2: 1288 ; GENERIC: # %bb.0: 1289 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50] 1290 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 1291 ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 1292 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1293 ; GENERIC-NEXT: retq # sched: [1:1.00] 1294 ; 1295 ; SKX-LABEL: test_masked_16xi32_perm_mask2: 1296 ; SKX: # %bb.0: 1297 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50] 1298 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 1299 ; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 1300 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 1301 ; SKX-NEXT: retq # sched: [7:1.00] 1302 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5> 1303 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1304 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1305 ret <16 x i32> %res 1306 } 1307 1308 define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { 1309 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask2: 1310 ; GENERIC: # %bb.0: 1311 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50] 1312 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1313 ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 1314 ; GENERIC-NEXT: retq # sched: [1:1.00] 1315 ; 1316 ; SKX-LABEL: test_masked_z_16xi32_perm_mask2: 1317 ; SKX: # %bb.0: 1318 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50] 1319 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1320 ; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 1321 ; SKX-NEXT: retq # sched: [7:1.00] 1322 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5> 1323 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1324 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1325 ret <16 x i32> %res 1326 } 1327 define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) { 1328 ; GENERIC-LABEL: test_16xi32_perm_mask3: 1329 ; GENERIC: # %bb.0: 1330 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] 1331 ; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 1332 ; GENERIC-NEXT: retq # sched: [1:1.00] 1333 ; 1334 ; SKX-LABEL: test_16xi32_perm_mask3: 1335 ; SKX: # %bb.0: 1336 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] 1337 ; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 1338 ; SKX-NEXT: retq # sched: [7:1.00] 1339 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12> 1340 ret <16 x i32> %res 1341 } 1342 define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 1343 ; GENERIC-LABEL: test_masked_16xi32_perm_mask3: 1344 ; GENERIC: # %bb.0: 1345 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] 1346 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 1347 ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 1348 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1349 ; GENERIC-NEXT: retq # sched: [1:1.00] 1350 ; 1351 ; SKX-LABEL: test_masked_16xi32_perm_mask3: 1352 ; SKX: # %bb.0: 1353 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] 1354 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 1355 ; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 1356 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 1357 ; SKX-NEXT: retq # sched: [7:1.00] 1358 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12> 1359 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1360 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1361 ret <16 x i32> %res 1362 } 1363 1364 define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { 1365 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask3: 1366 ; GENERIC: # %bb.0: 1367 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] 1368 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1369 ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 1370 ; GENERIC-NEXT: retq # sched: [1:1.00] 1371 ; 1372 ; SKX-LABEL: test_masked_z_16xi32_perm_mask3: 1373 ; SKX: # %bb.0: 1374 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] 1375 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1376 ; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 1377 ; SKX-NEXT: retq # sched: [7:1.00] 1378 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12> 1379 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1380 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1381 ret <16 x i32> %res 1382 } 1383 define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) { 1384 ; GENERIC-LABEL: test_16xi32_perm_mem_mask0: 1385 ; GENERIC: # %bb.0: 1386 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] 1387 ; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 1388 ; GENERIC-NEXT: retq # sched: [1:1.00] 1389 ; 1390 ; SKX-LABEL: test_16xi32_perm_mem_mask0: 1391 ; SKX: # %bb.0: 1392 ; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] 1393 ; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 1394 ; SKX-NEXT: retq # sched: [7:1.00] 1395 %vec = load <16 x i32>, <16 x i32>* %vp 1396 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6> 1397 ret <16 x i32> %res 1398 } 1399 define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 1400 ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask0: 1401 ; GENERIC: # %bb.0: 1402 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] 1403 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1404 ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 1405 ; GENERIC-NEXT: retq # sched: [1:1.00] 1406 ; 1407 ; SKX-LABEL: test_masked_16xi32_perm_mem_mask0: 1408 ; SKX: # %bb.0: 1409 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] 1410 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1411 ; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 1412 ; SKX-NEXT: retq # sched: [7:1.00] 1413 %vec = load <16 x i32>, <16 x i32>* %vp 1414 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6> 1415 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1416 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1417 ret <16 x i32> %res 1418 } 1419 1420 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { 1421 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask0: 1422 ; GENERIC: # %bb.0: 1423 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] 1424 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 1425 ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 1426 ; GENERIC-NEXT: retq # sched: [1:1.00] 1427 ; 1428 ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask0: 1429 ; SKX: # %bb.0: 1430 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] 1431 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 1432 ; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 1433 ; SKX-NEXT: retq # sched: [7:1.00] 1434 %vec = load <16 x i32>, <16 x i32>* %vp 1435 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6> 1436 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1437 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1438 ret <16 x i32> %res 1439 } 1440 1441 define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 1442 ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask1: 1443 ; GENERIC: # %bb.0: 1444 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50] 1445 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1446 ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 1447 ; GENERIC-NEXT: retq # sched: [1:1.00] 1448 ; 1449 ; SKX-LABEL: test_masked_16xi32_perm_mem_mask1: 1450 ; SKX: # %bb.0: 1451 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50] 1452 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1453 ; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 1454 ; SKX-NEXT: retq # sched: [7:1.00] 1455 %vec = load <16 x i32>, <16 x i32>* %vp 1456 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3> 1457 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1458 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1459 ret <16 x i32> %res 1460 } 1461 1462 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { 1463 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask1: 1464 ; GENERIC: # %bb.0: 1465 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50] 1466 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 1467 ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 1468 ; GENERIC-NEXT: retq # sched: [1:1.00] 1469 ; 1470 ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask1: 1471 ; SKX: # %bb.0: 1472 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50] 1473 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 1474 ; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 1475 ; SKX-NEXT: retq # sched: [7:1.00] 1476 %vec = load <16 x i32>, <16 x i32>* %vp 1477 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3> 1478 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1479 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1480 ret <16 x i32> %res 1481 } 1482 1483 define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 1484 ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask2: 1485 ; GENERIC: # %bb.0: 1486 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50] 1487 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1488 ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 1489 ; GENERIC-NEXT: retq # sched: [1:1.00] 1490 ; 1491 ; SKX-LABEL: test_masked_16xi32_perm_mem_mask2: 1492 ; SKX: # %bb.0: 1493 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50] 1494 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1495 ; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 1496 ; SKX-NEXT: retq # sched: [7:1.00] 1497 %vec = load <16 x i32>, <16 x i32>* %vp 1498 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2> 1499 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1500 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1501 ret <16 x i32> %res 1502 } 1503 1504 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { 1505 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask2: 1506 ; GENERIC: # %bb.0: 1507 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50] 1508 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 1509 ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 1510 ; GENERIC-NEXT: retq # sched: [1:1.00] 1511 ; 1512 ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask2: 1513 ; SKX: # %bb.0: 1514 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50] 1515 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 1516 ; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 1517 ; SKX-NEXT: retq # sched: [7:1.00] 1518 %vec = load <16 x i32>, <16 x i32>* %vp 1519 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2> 1520 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1521 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1522 ret <16 x i32> %res 1523 } 1524 1525 define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) { 1526 ; GENERIC-LABEL: test_16xi32_perm_mem_mask3: 1527 ; GENERIC: # %bb.0: 1528 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] 1529 ; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 1530 ; GENERIC-NEXT: retq # sched: [1:1.00] 1531 ; 1532 ; SKX-LABEL: test_16xi32_perm_mem_mask3: 1533 ; SKX: # %bb.0: 1534 ; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] 1535 ; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 1536 ; SKX-NEXT: retq # sched: [7:1.00] 1537 %vec = load <16 x i32>, <16 x i32>* %vp 1538 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1> 1539 ret <16 x i32> %res 1540 } 1541 define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 1542 ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask3: 1543 ; GENERIC: # %bb.0: 1544 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] 1545 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 1546 ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 1547 ; GENERIC-NEXT: retq # sched: [1:1.00] 1548 ; 1549 ; SKX-LABEL: test_masked_16xi32_perm_mem_mask3: 1550 ; SKX: # %bb.0: 1551 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] 1552 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 1553 ; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 1554 ; SKX-NEXT: retq # sched: [7:1.00] 1555 %vec = load <16 x i32>, <16 x i32>* %vp 1556 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1> 1557 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1558 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 1559 ret <16 x i32> %res 1560 } 1561 1562 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { 1563 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask3: 1564 ; GENERIC: # %bb.0: 1565 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] 1566 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 1567 ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 1568 ; GENERIC-NEXT: retq # sched: [1:1.00] 1569 ; 1570 ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask3: 1571 ; SKX: # %bb.0: 1572 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] 1573 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 1574 ; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 1575 ; SKX-NEXT: retq # sched: [7:1.00] 1576 %vec = load <16 x i32>, <16 x i32>* %vp 1577 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1> 1578 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1579 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1580 ret <16 x i32> %res 1581 } 1582 1583 define <4 x i64> @test_4xi64_perm_mask0(<4 x i64> %vec) { 1584 ; GENERIC-LABEL: test_4xi64_perm_mask0: 1585 ; GENERIC: # %bb.0: 1586 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [1:1.00] 1587 ; GENERIC-NEXT: retq # sched: [1:1.00] 1588 ; 1589 ; SKX-LABEL: test_4xi64_perm_mask0: 1590 ; SKX: # %bb.0: 1591 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [3:1.00] 1592 ; SKX-NEXT: retq # sched: [7:1.00] 1593 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 1594 ret <4 x i64> %res 1595 } 1596 define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1597 ; GENERIC-LABEL: test_masked_4xi64_perm_mask0: 1598 ; GENERIC: # %bb.0: 1599 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 1600 ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [1:1.00] 1601 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 1602 ; GENERIC-NEXT: retq # sched: [1:1.00] 1603 ; 1604 ; SKX-LABEL: test_masked_4xi64_perm_mask0: 1605 ; SKX: # %bb.0: 1606 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 1607 ; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [3:1.00] 1608 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 1609 ; SKX-NEXT: retq # sched: [7:1.00] 1610 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 1611 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1612 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1613 ret <4 x i64> %res 1614 } 1615 1616 define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask) { 1617 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask0: 1618 ; GENERIC: # %bb.0: 1619 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1620 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [1:1.00] 1621 ; GENERIC-NEXT: retq # sched: [1:1.00] 1622 ; 1623 ; SKX-LABEL: test_masked_z_4xi64_perm_mask0: 1624 ; SKX: # %bb.0: 1625 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1626 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [3:1.00] 1627 ; SKX-NEXT: retq # sched: [7:1.00] 1628 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 1629 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1630 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1631 ret <4 x i64> %res 1632 } 1633 define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1634 ; GENERIC-LABEL: test_masked_4xi64_perm_mask1: 1635 ; GENERIC: # %bb.0: 1636 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 1637 ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [1:1.00] 1638 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 1639 ; GENERIC-NEXT: retq # sched: [1:1.00] 1640 ; 1641 ; SKX-LABEL: test_masked_4xi64_perm_mask1: 1642 ; SKX: # %bb.0: 1643 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 1644 ; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [3:1.00] 1645 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 1646 ; SKX-NEXT: retq # sched: [7:1.00] 1647 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3> 1648 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1649 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1650 ret <4 x i64> %res 1651 } 1652 1653 define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask) { 1654 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask1: 1655 ; GENERIC: # %bb.0: 1656 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1657 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [1:1.00] 1658 ; GENERIC-NEXT: retq # sched: [1:1.00] 1659 ; 1660 ; SKX-LABEL: test_masked_z_4xi64_perm_mask1: 1661 ; SKX: # %bb.0: 1662 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1663 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [3:1.00] 1664 ; SKX-NEXT: retq # sched: [7:1.00] 1665 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3> 1666 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1667 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1668 ret <4 x i64> %res 1669 } 1670 define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1671 ; GENERIC-LABEL: test_masked_4xi64_perm_mask2: 1672 ; GENERIC: # %bb.0: 1673 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 1674 ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [1:1.00] 1675 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 1676 ; GENERIC-NEXT: retq # sched: [1:1.00] 1677 ; 1678 ; SKX-LABEL: test_masked_4xi64_perm_mask2: 1679 ; SKX: # %bb.0: 1680 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 1681 ; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [3:1.00] 1682 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 1683 ; SKX-NEXT: retq # sched: [7:1.00] 1684 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1> 1685 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1686 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1687 ret <4 x i64> %res 1688 } 1689 1690 define <4 x i64> @test_masked_z_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %mask) { 1691 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask2: 1692 ; GENERIC: # %bb.0: 1693 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1694 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [1:1.00] 1695 ; GENERIC-NEXT: retq # sched: [1:1.00] 1696 ; 1697 ; SKX-LABEL: test_masked_z_4xi64_perm_mask2: 1698 ; SKX: # %bb.0: 1699 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1700 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [3:1.00] 1701 ; SKX-NEXT: retq # sched: [7:1.00] 1702 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1> 1703 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1704 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1705 ret <4 x i64> %res 1706 } 1707 define <4 x i64> @test_4xi64_perm_mask3(<4 x i64> %vec) { 1708 ; GENERIC-LABEL: test_4xi64_perm_mask3: 1709 ; GENERIC: # %bb.0: 1710 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [1:1.00] 1711 ; GENERIC-NEXT: retq # sched: [1:1.00] 1712 ; 1713 ; SKX-LABEL: test_4xi64_perm_mask3: 1714 ; SKX: # %bb.0: 1715 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [3:1.00] 1716 ; SKX-NEXT: retq # sched: [7:1.00] 1717 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3> 1718 ret <4 x i64> %res 1719 } 1720 define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { 1721 ; GENERIC-LABEL: test_masked_4xi64_perm_mask3: 1722 ; GENERIC: # %bb.0: 1723 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 1724 ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [1:1.00] 1725 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 1726 ; GENERIC-NEXT: retq # sched: [1:1.00] 1727 ; 1728 ; SKX-LABEL: test_masked_4xi64_perm_mask3: 1729 ; SKX: # %bb.0: 1730 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 1731 ; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [3:1.00] 1732 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 1733 ; SKX-NEXT: retq # sched: [7:1.00] 1734 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3> 1735 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1736 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1737 ret <4 x i64> %res 1738 } 1739 1740 define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask) { 1741 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask3: 1742 ; GENERIC: # %bb.0: 1743 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1744 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [1:1.00] 1745 ; GENERIC-NEXT: retq # sched: [1:1.00] 1746 ; 1747 ; SKX-LABEL: test_masked_z_4xi64_perm_mask3: 1748 ; SKX: # %bb.0: 1749 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1750 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [3:1.00] 1751 ; SKX-NEXT: retq # sched: [7:1.00] 1752 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3> 1753 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1754 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1755 ret <4 x i64> %res 1756 } 1757 define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) { 1758 ; GENERIC-LABEL: test_4xi64_perm_mem_mask0: 1759 ; GENERIC: # %bb.0: 1760 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [8:1.00] 1761 ; GENERIC-NEXT: retq # sched: [1:1.00] 1762 ; 1763 ; SKX-LABEL: test_4xi64_perm_mem_mask0: 1764 ; SKX: # %bb.0: 1765 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [10:1.00] 1766 ; SKX-NEXT: retq # sched: [7:1.00] 1767 %vec = load <4 x i64>, <4 x i64>* %vp 1768 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0> 1769 ret <4 x i64> %res 1770 } 1771 define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1772 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0: 1773 ; GENERIC: # %bb.0: 1774 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1775 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [8:1.00] 1776 ; GENERIC-NEXT: retq # sched: [1:1.00] 1777 ; 1778 ; SKX-LABEL: test_masked_4xi64_perm_mem_mask0: 1779 ; SKX: # %bb.0: 1780 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1781 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [10:1.00] 1782 ; SKX-NEXT: retq # sched: [7:1.00] 1783 %vec = load <4 x i64>, <4 x i64>* %vp 1784 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0> 1785 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1786 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1787 ret <4 x i64> %res 1788 } 1789 1790 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %mask) { 1791 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0: 1792 ; GENERIC: # %bb.0: 1793 ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 1794 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [8:1.00] 1795 ; GENERIC-NEXT: retq # sched: [1:1.00] 1796 ; 1797 ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask0: 1798 ; SKX: # %bb.0: 1799 ; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 1800 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [10:1.00] 1801 ; SKX-NEXT: retq # sched: [7:1.00] 1802 %vec = load <4 x i64>, <4 x i64>* %vp 1803 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0> 1804 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1805 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1806 ret <4 x i64> %res 1807 } 1808 1809 define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1810 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1: 1811 ; GENERIC: # %bb.0: 1812 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1813 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [8:1.00] 1814 ; GENERIC-NEXT: retq # sched: [1:1.00] 1815 ; 1816 ; SKX-LABEL: test_masked_4xi64_perm_mem_mask1: 1817 ; SKX: # %bb.0: 1818 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1819 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [10:1.00] 1820 ; SKX-NEXT: retq # sched: [7:1.00] 1821 %vec = load <4 x i64>, <4 x i64>* %vp 1822 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 1823 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1824 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1825 ret <4 x i64> %res 1826 } 1827 1828 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %mask) { 1829 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1: 1830 ; GENERIC: # %bb.0: 1831 ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 1832 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [8:1.00] 1833 ; GENERIC-NEXT: retq # sched: [1:1.00] 1834 ; 1835 ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask1: 1836 ; SKX: # %bb.0: 1837 ; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 1838 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [10:1.00] 1839 ; SKX-NEXT: retq # sched: [7:1.00] 1840 %vec = load <4 x i64>, <4 x i64>* %vp 1841 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 1842 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1843 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1844 ret <4 x i64> %res 1845 } 1846 1847 define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1848 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2: 1849 ; GENERIC: # %bb.0: 1850 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1851 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [8:1.00] 1852 ; GENERIC-NEXT: retq # sched: [1:1.00] 1853 ; 1854 ; SKX-LABEL: test_masked_4xi64_perm_mem_mask2: 1855 ; SKX: # %bb.0: 1856 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1857 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [10:1.00] 1858 ; SKX-NEXT: retq # sched: [7:1.00] 1859 %vec = load <4 x i64>, <4 x i64>* %vp 1860 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0> 1861 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1862 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1863 ret <4 x i64> %res 1864 } 1865 1866 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %mask) { 1867 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2: 1868 ; GENERIC: # %bb.0: 1869 ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 1870 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [8:1.00] 1871 ; GENERIC-NEXT: retq # sched: [1:1.00] 1872 ; 1873 ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask2: 1874 ; SKX: # %bb.0: 1875 ; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 1876 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [10:1.00] 1877 ; SKX-NEXT: retq # sched: [7:1.00] 1878 %vec = load <4 x i64>, <4 x i64>* %vp 1879 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0> 1880 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1881 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1882 ret <4 x i64> %res 1883 } 1884 1885 define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) { 1886 ; GENERIC-LABEL: test_4xi64_perm_mem_mask3: 1887 ; GENERIC: # %bb.0: 1888 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [8:1.00] 1889 ; GENERIC-NEXT: retq # sched: [1:1.00] 1890 ; 1891 ; SKX-LABEL: test_4xi64_perm_mem_mask3: 1892 ; SKX: # %bb.0: 1893 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [10:1.00] 1894 ; SKX-NEXT: retq # sched: [7:1.00] 1895 %vec = load <4 x i64>, <4 x i64>* %vp 1896 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3> 1897 ret <4 x i64> %res 1898 } 1899 define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { 1900 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3: 1901 ; GENERIC: # %bb.0: 1902 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 1903 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [8:1.00] 1904 ; GENERIC-NEXT: retq # sched: [1:1.00] 1905 ; 1906 ; SKX-LABEL: test_masked_4xi64_perm_mem_mask3: 1907 ; SKX: # %bb.0: 1908 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 1909 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [10:1.00] 1910 ; SKX-NEXT: retq # sched: [7:1.00] 1911 %vec = load <4 x i64>, <4 x i64>* %vp 1912 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3> 1913 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1914 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 1915 ret <4 x i64> %res 1916 } 1917 1918 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %mask) { 1919 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3: 1920 ; GENERIC: # %bb.0: 1921 ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 1922 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [8:1.00] 1923 ; GENERIC-NEXT: retq # sched: [1:1.00] 1924 ; 1925 ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask3: 1926 ; SKX: # %bb.0: 1927 ; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 1928 ; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [10:1.00] 1929 ; SKX-NEXT: retq # sched: [7:1.00] 1930 %vec = load <4 x i64>, <4 x i64>* %vp 1931 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3> 1932 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 1933 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 1934 ret <4 x i64> %res 1935 } 1936 1937 define <8 x i64> @test_8xi64_perm_mask0(<8 x i64> %vec) { 1938 ; GENERIC-LABEL: test_8xi64_perm_mask0: 1939 ; GENERIC: # %bb.0: 1940 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [7:0.50] 1941 ; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 1942 ; GENERIC-NEXT: retq # sched: [1:1.00] 1943 ; 1944 ; SKX-LABEL: test_8xi64_perm_mask0: 1945 ; SKX: # %bb.0: 1946 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [8:0.50] 1947 ; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 1948 ; SKX-NEXT: retq # sched: [7:1.00] 1949 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6> 1950 ret <8 x i64> %res 1951 } 1952 define <8 x i64> @test_masked_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1953 ; GENERIC-LABEL: test_masked_8xi64_perm_mask0: 1954 ; GENERIC: # %bb.0: 1955 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [7:0.50] 1956 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 1957 ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 1958 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1959 ; GENERIC-NEXT: retq # sched: [1:1.00] 1960 ; 1961 ; SKX-LABEL: test_masked_8xi64_perm_mask0: 1962 ; SKX: # %bb.0: 1963 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [8:0.50] 1964 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 1965 ; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 1966 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 1967 ; SKX-NEXT: retq # sched: [7:1.00] 1968 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6> 1969 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1970 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 1971 ret <8 x i64> %res 1972 } 1973 1974 define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %mask) { 1975 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask0: 1976 ; GENERIC: # %bb.0: 1977 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [7:0.50] 1978 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 1979 ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 1980 ; GENERIC-NEXT: retq # sched: [1:1.00] 1981 ; 1982 ; SKX-LABEL: test_masked_z_8xi64_perm_mask0: 1983 ; SKX: # %bb.0: 1984 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [8:0.50] 1985 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 1986 ; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 1987 ; SKX-NEXT: retq # sched: [7:1.00] 1988 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6> 1989 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1990 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1991 ret <8 x i64> %res 1992 } 1993 define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 1994 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask1: 1995 ; GENERIC: # %bb.0: 1996 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 1997 ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] 1998 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 1999 ; GENERIC-NEXT: retq # sched: [1:1.00] 2000 ; 2001 ; SKX-LABEL: test_masked_8xi64_perm_imm_mask1: 2002 ; SKX: # %bb.0: 2003 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2004 ; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [3:1.00] 2005 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2006 ; SKX-NEXT: retq # sched: [7:1.00] 2007 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 5, i32 4, i32 5, i32 5> 2008 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2009 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2010 ret <8 x i64> %res 2011 } 2012 2013 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %mask) { 2014 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask1: 2015 ; GENERIC: # %bb.0: 2016 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2017 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] 2018 ; GENERIC-NEXT: retq # sched: [1:1.00] 2019 ; 2020 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask1: 2021 ; SKX: # %bb.0: 2022 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2023 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [3:1.00] 2024 ; SKX-NEXT: retq # sched: [7:1.00] 2025 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 5, i32 4, i32 5, i32 5> 2026 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2027 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2028 ret <8 x i64> %res 2029 } 2030 define <8 x i64> @test_masked_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2031 ; GENERIC-LABEL: test_masked_8xi64_perm_mask2: 2032 ; GENERIC: # %bb.0: 2033 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [7:0.50] 2034 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2035 ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 2036 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2037 ; GENERIC-NEXT: retq # sched: [1:1.00] 2038 ; 2039 ; SKX-LABEL: test_masked_8xi64_perm_mask2: 2040 ; SKX: # %bb.0: 2041 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [8:0.50] 2042 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2043 ; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 2044 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2045 ; SKX-NEXT: retq # sched: [7:1.00] 2046 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 3, i32 3, i32 5, i32 4, i32 1> 2047 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2048 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2049 ret <8 x i64> %res 2050 } 2051 2052 define <8 x i64> @test_masked_z_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %mask) { 2053 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask2: 2054 ; GENERIC: # %bb.0: 2055 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [7:0.50] 2056 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2057 ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 2058 ; GENERIC-NEXT: retq # sched: [1:1.00] 2059 ; 2060 ; SKX-LABEL: test_masked_z_8xi64_perm_mask2: 2061 ; SKX: # %bb.0: 2062 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [8:0.50] 2063 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2064 ; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 2065 ; SKX-NEXT: retq # sched: [7:1.00] 2066 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 3, i32 3, i32 5, i32 4, i32 1> 2067 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2068 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2069 ret <8 x i64> %res 2070 } 2071 define <8 x i64> @test_8xi64_perm_imm_mask3(<8 x i64> %vec) { 2072 ; GENERIC-LABEL: test_8xi64_perm_imm_mask3: 2073 ; GENERIC: # %bb.0: 2074 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] 2075 ; GENERIC-NEXT: retq # sched: [1:1.00] 2076 ; 2077 ; SKX-LABEL: test_8xi64_perm_imm_mask3: 2078 ; SKX: # %bb.0: 2079 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] 2080 ; SKX-NEXT: retq # sched: [7:1.00] 2081 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5> 2082 ret <8 x i64> %res 2083 } 2084 define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2085 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask3: 2086 ; GENERIC: # %bb.0: 2087 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2088 ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] 2089 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2090 ; GENERIC-NEXT: retq # sched: [1:1.00] 2091 ; 2092 ; SKX-LABEL: test_masked_8xi64_perm_imm_mask3: 2093 ; SKX: # %bb.0: 2094 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2095 ; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] 2096 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2097 ; SKX-NEXT: retq # sched: [7:1.00] 2098 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5> 2099 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2100 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2101 ret <8 x i64> %res 2102 } 2103 2104 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %mask) { 2105 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask3: 2106 ; GENERIC: # %bb.0: 2107 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2108 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] 2109 ; GENERIC-NEXT: retq # sched: [1:1.00] 2110 ; 2111 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask3: 2112 ; SKX: # %bb.0: 2113 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2114 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] 2115 ; SKX-NEXT: retq # sched: [7:1.00] 2116 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5> 2117 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2118 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2119 ret <8 x i64> %res 2120 } 2121 define <8 x i64> @test_masked_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2122 ; GENERIC-LABEL: test_masked_8xi64_perm_mask4: 2123 ; GENERIC: # %bb.0: 2124 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [7:0.50] 2125 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2126 ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 2127 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2128 ; GENERIC-NEXT: retq # sched: [1:1.00] 2129 ; 2130 ; SKX-LABEL: test_masked_8xi64_perm_mask4: 2131 ; SKX: # %bb.0: 2132 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [8:0.50] 2133 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2134 ; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 2135 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2136 ; SKX-NEXT: retq # sched: [7:1.00] 2137 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 6, i32 3, i32 1, i32 1, i32 7, i32 4, i32 0, i32 3> 2138 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2139 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2140 ret <8 x i64> %res 2141 } 2142 2143 define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %mask) { 2144 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask4: 2145 ; GENERIC: # %bb.0: 2146 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [7:0.50] 2147 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2148 ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 2149 ; GENERIC-NEXT: retq # sched: [1:1.00] 2150 ; 2151 ; SKX-LABEL: test_masked_z_8xi64_perm_mask4: 2152 ; SKX: # %bb.0: 2153 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [8:0.50] 2154 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2155 ; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 2156 ; SKX-NEXT: retq # sched: [7:1.00] 2157 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 6, i32 3, i32 1, i32 1, i32 7, i32 4, i32 0, i32 3> 2158 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2159 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2160 ret <8 x i64> %res 2161 } 2162 define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2163 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask5: 2164 ; GENERIC: # %bb.0: 2165 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2166 ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] 2167 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2168 ; GENERIC-NEXT: retq # sched: [1:1.00] 2169 ; 2170 ; SKX-LABEL: test_masked_8xi64_perm_imm_mask5: 2171 ; SKX: # %bb.0: 2172 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2173 ; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [3:1.00] 2174 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2175 ; SKX-NEXT: retq # sched: [7:1.00] 2176 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 2177 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2178 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2179 ret <8 x i64> %res 2180 } 2181 2182 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %mask) { 2183 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask5: 2184 ; GENERIC: # %bb.0: 2185 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2186 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] 2187 ; GENERIC-NEXT: retq # sched: [1:1.00] 2188 ; 2189 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask5: 2190 ; SKX: # %bb.0: 2191 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2192 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [3:1.00] 2193 ; SKX-NEXT: retq # sched: [7:1.00] 2194 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 2195 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2196 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2197 ret <8 x i64> %res 2198 } 2199 define <8 x i64> @test_8xi64_perm_mask6(<8 x i64> %vec) { 2200 ; GENERIC-LABEL: test_8xi64_perm_mask6: 2201 ; GENERIC: # %bb.0: 2202 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [7:0.50] 2203 ; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 2204 ; GENERIC-NEXT: retq # sched: [1:1.00] 2205 ; 2206 ; SKX-LABEL: test_8xi64_perm_mask6: 2207 ; SKX: # %bb.0: 2208 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [8:0.50] 2209 ; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 2210 ; SKX-NEXT: retq # sched: [7:1.00] 2211 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7> 2212 ret <8 x i64> %res 2213 } 2214 define <8 x i64> @test_masked_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2215 ; GENERIC-LABEL: test_masked_8xi64_perm_mask6: 2216 ; GENERIC: # %bb.0: 2217 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [7:0.50] 2218 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2219 ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 2220 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2221 ; GENERIC-NEXT: retq # sched: [1:1.00] 2222 ; 2223 ; SKX-LABEL: test_masked_8xi64_perm_mask6: 2224 ; SKX: # %bb.0: 2225 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [8:0.50] 2226 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2227 ; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 2228 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2229 ; SKX-NEXT: retq # sched: [7:1.00] 2230 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7> 2231 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2232 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2233 ret <8 x i64> %res 2234 } 2235 2236 define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %mask) { 2237 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask6: 2238 ; GENERIC: # %bb.0: 2239 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [7:0.50] 2240 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2241 ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 2242 ; GENERIC-NEXT: retq # sched: [1:1.00] 2243 ; 2244 ; SKX-LABEL: test_masked_z_8xi64_perm_mask6: 2245 ; SKX: # %bb.0: 2246 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [8:0.50] 2247 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2248 ; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 2249 ; SKX-NEXT: retq # sched: [7:1.00] 2250 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7> 2251 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2252 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2253 ret <8 x i64> %res 2254 } 2255 define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { 2256 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask7: 2257 ; GENERIC: # %bb.0: 2258 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 2259 ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] 2260 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 2261 ; GENERIC-NEXT: retq # sched: [1:1.00] 2262 ; 2263 ; SKX-LABEL: test_masked_8xi64_perm_imm_mask7: 2264 ; SKX: # %bb.0: 2265 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 2266 ; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [3:1.00] 2267 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 2268 ; SKX-NEXT: retq # sched: [7:1.00] 2269 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7> 2270 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2271 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2272 ret <8 x i64> %res 2273 } 2274 2275 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %mask) { 2276 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask7: 2277 ; GENERIC: # %bb.0: 2278 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2279 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] 2280 ; GENERIC-NEXT: retq # sched: [1:1.00] 2281 ; 2282 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask7: 2283 ; SKX: # %bb.0: 2284 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2285 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [3:1.00] 2286 ; SKX-NEXT: retq # sched: [7:1.00] 2287 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7> 2288 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2289 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2290 ret <8 x i64> %res 2291 } 2292 define <8 x i64> @test_8xi64_perm_mem_mask0(<8 x i64>* %vp) { 2293 ; GENERIC-LABEL: test_8xi64_perm_mem_mask0: 2294 ; GENERIC: # %bb.0: 2295 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [7:0.50] 2296 ; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 2297 ; GENERIC-NEXT: retq # sched: [1:1.00] 2298 ; 2299 ; SKX-LABEL: test_8xi64_perm_mem_mask0: 2300 ; SKX: # %bb.0: 2301 ; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [8:0.50] 2302 ; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 2303 ; SKX-NEXT: retq # sched: [7:1.00] 2304 %vec = load <8 x i64>, <8 x i64>* %vp 2305 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3> 2306 ret <8 x i64> %res 2307 } 2308 define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2309 ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask0: 2310 ; GENERIC: # %bb.0: 2311 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [7:0.50] 2312 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2313 ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 2314 ; GENERIC-NEXT: retq # sched: [1:1.00] 2315 ; 2316 ; SKX-LABEL: test_masked_8xi64_perm_mem_mask0: 2317 ; SKX: # %bb.0: 2318 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [8:0.50] 2319 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2320 ; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 2321 ; SKX-NEXT: retq # sched: [7:1.00] 2322 %vec = load <8 x i64>, <8 x i64>* %vp 2323 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3> 2324 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2325 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2326 ret <8 x i64> %res 2327 } 2328 2329 define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %mask) { 2330 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask0: 2331 ; GENERIC: # %bb.0: 2332 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [7:0.50] 2333 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2334 ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 2335 ; GENERIC-NEXT: retq # sched: [1:1.00] 2336 ; 2337 ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask0: 2338 ; SKX: # %bb.0: 2339 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [8:0.50] 2340 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2341 ; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 2342 ; SKX-NEXT: retq # sched: [7:1.00] 2343 %vec = load <8 x i64>, <8 x i64>* %vp 2344 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3> 2345 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2346 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2347 ret <8 x i64> %res 2348 } 2349 2350 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2351 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1: 2352 ; GENERIC: # %bb.0: 2353 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2354 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00] 2355 ; GENERIC-NEXT: retq # sched: [1:1.00] 2356 ; 2357 ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask1: 2358 ; SKX: # %bb.0: 2359 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2360 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [10:1.00] 2361 ; SKX-NEXT: retq # sched: [7:1.00] 2362 %vec = load <8 x i64>, <8 x i64>* %vp 2363 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 0, i32 5, i32 5, i32 5, i32 4> 2364 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2365 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2366 ret <8 x i64> %res 2367 } 2368 2369 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %mask) { 2370 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: 2371 ; GENERIC: # %bb.0: 2372 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2373 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00] 2374 ; GENERIC-NEXT: retq # sched: [1:1.00] 2375 ; 2376 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: 2377 ; SKX: # %bb.0: 2378 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2379 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [10:1.00] 2380 ; SKX-NEXT: retq # sched: [7:1.00] 2381 %vec = load <8 x i64>, <8 x i64>* %vp 2382 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 0, i32 5, i32 5, i32 5, i32 4> 2383 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2384 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2385 ret <8 x i64> %res 2386 } 2387 2388 define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2389 ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask2: 2390 ; GENERIC: # %bb.0: 2391 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [7:0.50] 2392 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2393 ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 2394 ; GENERIC-NEXT: retq # sched: [1:1.00] 2395 ; 2396 ; SKX-LABEL: test_masked_8xi64_perm_mem_mask2: 2397 ; SKX: # %bb.0: 2398 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [8:0.50] 2399 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2400 ; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 2401 ; SKX-NEXT: retq # sched: [7:1.00] 2402 %vec = load <8 x i64>, <8 x i64>* %vp 2403 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 1, i32 4, i32 1, i32 1, i32 5, i32 5> 2404 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2405 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2406 ret <8 x i64> %res 2407 } 2408 2409 define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %mask) { 2410 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask2: 2411 ; GENERIC: # %bb.0: 2412 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [7:0.50] 2413 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2414 ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 2415 ; GENERIC-NEXT: retq # sched: [1:1.00] 2416 ; 2417 ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask2: 2418 ; SKX: # %bb.0: 2419 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [8:0.50] 2420 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2421 ; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 2422 ; SKX-NEXT: retq # sched: [7:1.00] 2423 %vec = load <8 x i64>, <8 x i64>* %vp 2424 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 1, i32 4, i32 1, i32 1, i32 5, i32 5> 2425 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2426 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2427 ret <8 x i64> %res 2428 } 2429 2430 define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) { 2431 ; GENERIC-LABEL: test_8xi64_perm_imm_mem_mask3: 2432 ; GENERIC: # %bb.0: 2433 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] 2434 ; GENERIC-NEXT: retq # sched: [1:1.00] 2435 ; 2436 ; SKX-LABEL: test_8xi64_perm_imm_mem_mask3: 2437 ; SKX: # %bb.0: 2438 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] 2439 ; SKX-NEXT: retq # sched: [7:1.00] 2440 %vec = load <8 x i64>, <8 x i64>* %vp 2441 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5> 2442 ret <8 x i64> %res 2443 } 2444 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2445 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3: 2446 ; GENERIC: # %bb.0: 2447 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2448 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] 2449 ; GENERIC-NEXT: retq # sched: [1:1.00] 2450 ; 2451 ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask3: 2452 ; SKX: # %bb.0: 2453 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2454 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] 2455 ; SKX-NEXT: retq # sched: [7:1.00] 2456 %vec = load <8 x i64>, <8 x i64>* %vp 2457 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5> 2458 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2459 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2460 ret <8 x i64> %res 2461 } 2462 2463 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %mask) { 2464 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: 2465 ; GENERIC: # %bb.0: 2466 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2467 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] 2468 ; GENERIC-NEXT: retq # sched: [1:1.00] 2469 ; 2470 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: 2471 ; SKX: # %bb.0: 2472 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2473 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] 2474 ; SKX-NEXT: retq # sched: [7:1.00] 2475 %vec = load <8 x i64>, <8 x i64>* %vp 2476 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5> 2477 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2478 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2479 ret <8 x i64> %res 2480 } 2481 2482 define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2483 ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask4: 2484 ; GENERIC: # %bb.0: 2485 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [7:0.50] 2486 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2487 ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 2488 ; GENERIC-NEXT: retq # sched: [1:1.00] 2489 ; 2490 ; SKX-LABEL: test_masked_8xi64_perm_mem_mask4: 2491 ; SKX: # %bb.0: 2492 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [8:0.50] 2493 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2494 ; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 2495 ; SKX-NEXT: retq # sched: [7:1.00] 2496 %vec = load <8 x i64>, <8 x i64>* %vp 2497 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 0, i32 7, i32 0, i32 3, i32 5, i32 0, i32 6> 2498 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2499 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2500 ret <8 x i64> %res 2501 } 2502 2503 define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %mask) { 2504 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask4: 2505 ; GENERIC: # %bb.0: 2506 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [7:0.50] 2507 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2508 ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 2509 ; GENERIC-NEXT: retq # sched: [1:1.00] 2510 ; 2511 ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask4: 2512 ; SKX: # %bb.0: 2513 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [8:0.50] 2514 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2515 ; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 2516 ; SKX-NEXT: retq # sched: [7:1.00] 2517 %vec = load <8 x i64>, <8 x i64>* %vp 2518 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 0, i32 7, i32 0, i32 3, i32 5, i32 0, i32 6> 2519 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2520 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2521 ret <8 x i64> %res 2522 } 2523 2524 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2525 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5: 2526 ; GENERIC: # %bb.0: 2527 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2528 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00] 2529 ; GENERIC-NEXT: retq # sched: [1:1.00] 2530 ; 2531 ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask5: 2532 ; SKX: # %bb.0: 2533 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2534 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [10:1.00] 2535 ; SKX-NEXT: retq # sched: [7:1.00] 2536 %vec = load <8 x i64>, <8 x i64>* %vp 2537 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 0, i32 0, i32 7, i32 5, i32 4, i32 4> 2538 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2539 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2540 ret <8 x i64> %res 2541 } 2542 2543 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %mask) { 2544 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: 2545 ; GENERIC: # %bb.0: 2546 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2547 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00] 2548 ; GENERIC-NEXT: retq # sched: [1:1.00] 2549 ; 2550 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: 2551 ; SKX: # %bb.0: 2552 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2553 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [10:1.00] 2554 ; SKX-NEXT: retq # sched: [7:1.00] 2555 %vec = load <8 x i64>, <8 x i64>* %vp 2556 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 0, i32 0, i32 7, i32 5, i32 4, i32 4> 2557 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2558 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2559 ret <8 x i64> %res 2560 } 2561 2562 define <8 x i64> @test_8xi64_perm_mem_mask6(<8 x i64>* %vp) { 2563 ; GENERIC-LABEL: test_8xi64_perm_mem_mask6: 2564 ; GENERIC: # %bb.0: 2565 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [7:0.50] 2566 ; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 2567 ; GENERIC-NEXT: retq # sched: [1:1.00] 2568 ; 2569 ; SKX-LABEL: test_8xi64_perm_mem_mask6: 2570 ; SKX: # %bb.0: 2571 ; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [8:0.50] 2572 ; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 2573 ; SKX-NEXT: retq # sched: [7:1.00] 2574 %vec = load <8 x i64>, <8 x i64>* %vp 2575 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6> 2576 ret <8 x i64> %res 2577 } 2578 define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2579 ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask6: 2580 ; GENERIC: # %bb.0: 2581 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [7:0.50] 2582 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2583 ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 2584 ; GENERIC-NEXT: retq # sched: [1:1.00] 2585 ; 2586 ; SKX-LABEL: test_masked_8xi64_perm_mem_mask6: 2587 ; SKX: # %bb.0: 2588 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [8:0.50] 2589 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2590 ; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 2591 ; SKX-NEXT: retq # sched: [7:1.00] 2592 %vec = load <8 x i64>, <8 x i64>* %vp 2593 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6> 2594 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2595 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2596 ret <8 x i64> %res 2597 } 2598 2599 define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %mask) { 2600 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask6: 2601 ; GENERIC: # %bb.0: 2602 ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [7:0.50] 2603 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2604 ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 2605 ; GENERIC-NEXT: retq # sched: [1:1.00] 2606 ; 2607 ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask6: 2608 ; SKX: # %bb.0: 2609 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [8:0.50] 2610 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2611 ; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 2612 ; SKX-NEXT: retq # sched: [7:1.00] 2613 %vec = load <8 x i64>, <8 x i64>* %vp 2614 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6> 2615 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2616 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2617 ret <8 x i64> %res 2618 } 2619 2620 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { 2621 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7: 2622 ; GENERIC: # %bb.0: 2623 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2624 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00] 2625 ; GENERIC-NEXT: retq # sched: [1:1.00] 2626 ; 2627 ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask7: 2628 ; SKX: # %bb.0: 2629 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2630 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [10:1.00] 2631 ; SKX-NEXT: retq # sched: [7:1.00] 2632 %vec = load <8 x i64>, <8 x i64>* %vp 2633 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 1, i32 7, i32 4, i32 4, i32 5> 2634 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2635 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 2636 ret <8 x i64> %res 2637 } 2638 2639 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %mask) { 2640 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: 2641 ; GENERIC: # %bb.0: 2642 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 2643 ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00] 2644 ; GENERIC-NEXT: retq # sched: [1:1.00] 2645 ; 2646 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: 2647 ; SKX: # %bb.0: 2648 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 2649 ; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [10:1.00] 2650 ; SKX-NEXT: retq # sched: [7:1.00] 2651 %vec = load <8 x i64>, <8 x i64>* %vp 2652 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 1, i32 7, i32 4, i32 4, i32 5> 2653 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2654 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 2655 ret <8 x i64> %res 2656 } 2657 2658 define <8 x float> @test_8xfloat_perm_mask0(<8 x float> %vec) { 2659 ; GENERIC-LABEL: test_8xfloat_perm_mask0: 2660 ; GENERIC: # %bb.0: 2661 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2662 ; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 2663 ; GENERIC-NEXT: retq # sched: [1:1.00] 2664 ; 2665 ; SKX-LABEL: test_8xfloat_perm_mask0: 2666 ; SKX: # %bb.0: 2667 ; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2668 ; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2669 ; SKX-NEXT: retq # sched: [7:1.00] 2670 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4> 2671 ret <8 x float> %res 2672 } 2673 define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { 2674 ; GENERIC-LABEL: test_masked_8xfloat_perm_mask0: 2675 ; GENERIC: # %bb.0: 2676 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2677 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 2678 ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 2679 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 2680 ; GENERIC-NEXT: retq # sched: [1:1.00] 2681 ; 2682 ; SKX-LABEL: test_masked_8xfloat_perm_mask0: 2683 ; SKX: # %bb.0: 2684 ; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2685 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 2686 ; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 2687 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 2688 ; SKX-NEXT: retq # sched: [7:1.00] 2689 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4> 2690 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2691 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2692 ret <8 x float> %res 2693 } 2694 2695 define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x i32> %mask) { 2696 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask0: 2697 ; GENERIC: # %bb.0: 2698 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2699 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2700 ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 2701 ; GENERIC-NEXT: retq # sched: [1:1.00] 2702 ; 2703 ; SKX-LABEL: test_masked_z_8xfloat_perm_mask0: 2704 ; SKX: # %bb.0: 2705 ; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50] 2706 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2707 ; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 2708 ; SKX-NEXT: retq # sched: [7:1.00] 2709 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4> 2710 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2711 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2712 ret <8 x float> %res 2713 } 2714 define <8 x float> @test_masked_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { 2715 ; GENERIC-LABEL: test_masked_8xfloat_perm_mask1: 2716 ; GENERIC: # %bb.0: 2717 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50] 2718 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 2719 ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 2720 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 2721 ; GENERIC-NEXT: retq # sched: [1:1.00] 2722 ; 2723 ; SKX-LABEL: test_masked_8xfloat_perm_mask1: 2724 ; SKX: # %bb.0: 2725 ; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50] 2726 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 2727 ; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 2728 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 2729 ; SKX-NEXT: retq # sched: [7:1.00] 2730 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 2, i32 1, i32 0, i32 6, i32 0, i32 5, i32 1> 2731 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2732 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2733 ret <8 x float> %res 2734 } 2735 2736 define <8 x float> @test_masked_z_8xfloat_perm_mask1(<8 x float> %vec, <8 x i64> %mask) { 2737 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask1: 2738 ; GENERIC: # %bb.0: 2739 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50] 2740 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 2741 ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 2742 ; GENERIC-NEXT: retq # sched: [1:1.00] 2743 ; 2744 ; SKX-LABEL: test_masked_z_8xfloat_perm_mask1: 2745 ; SKX: # %bb.0: 2746 ; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50] 2747 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 2748 ; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 2749 ; SKX-NEXT: retq # sched: [7:1.00] 2750 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 2, i32 1, i32 0, i32 6, i32 0, i32 5, i32 1> 2751 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 2752 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2753 ret <8 x float> %res 2754 } 2755 define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { 2756 ; GENERIC-LABEL: test_masked_8xfloat_perm_mask2: 2757 ; GENERIC: # %bb.0: 2758 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50] 2759 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 2760 ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 2761 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 2762 ; GENERIC-NEXT: retq # sched: [1:1.00] 2763 ; 2764 ; SKX-LABEL: test_masked_8xfloat_perm_mask2: 2765 ; SKX: # %bb.0: 2766 ; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50] 2767 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 2768 ; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 2769 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 2770 ; SKX-NEXT: retq # sched: [7:1.00] 2771 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 5, i32 5, i32 5, i32 4, i32 6, i32 0, i32 5> 2772 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2773 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2774 ret <8 x float> %res 2775 } 2776 2777 define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x i32> %mask) { 2778 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask2: 2779 ; GENERIC: # %bb.0: 2780 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50] 2781 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2782 ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 2783 ; GENERIC-NEXT: retq # sched: [1:1.00] 2784 ; 2785 ; SKX-LABEL: test_masked_z_8xfloat_perm_mask2: 2786 ; SKX: # %bb.0: 2787 ; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50] 2788 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2789 ; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 2790 ; SKX-NEXT: retq # sched: [7:1.00] 2791 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 5, i32 5, i32 5, i32 4, i32 6, i32 0, i32 5> 2792 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2793 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2794 ret <8 x float> %res 2795 } 2796 define <8 x float> @test_8xfloat_perm_mask3(<8 x float> %vec) { 2797 ; GENERIC-LABEL: test_8xfloat_perm_mask3: 2798 ; GENERIC: # %bb.0: 2799 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2800 ; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 2801 ; GENERIC-NEXT: retq # sched: [1:1.00] 2802 ; 2803 ; SKX-LABEL: test_8xfloat_perm_mask3: 2804 ; SKX: # %bb.0: 2805 ; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2806 ; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2807 ; SKX-NEXT: retq # sched: [7:1.00] 2808 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6> 2809 ret <8 x float> %res 2810 } 2811 define <8 x float> @test_masked_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { 2812 ; GENERIC-LABEL: test_masked_8xfloat_perm_mask3: 2813 ; GENERIC: # %bb.0: 2814 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2815 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 2816 ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] 2817 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 2818 ; GENERIC-NEXT: retq # sched: [1:1.00] 2819 ; 2820 ; SKX-LABEL: test_masked_8xfloat_perm_mask3: 2821 ; SKX: # %bb.0: 2822 ; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2823 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 2824 ; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] 2825 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 2826 ; SKX-NEXT: retq # sched: [7:1.00] 2827 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6> 2828 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2829 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2830 ret <8 x float> %res 2831 } 2832 2833 define <8 x float> @test_masked_z_8xfloat_perm_mask3(<8 x float> %vec, <8 x i32> %mask) { 2834 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask3: 2835 ; GENERIC: # %bb.0: 2836 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2837 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2838 ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] 2839 ; GENERIC-NEXT: retq # sched: [1:1.00] 2840 ; 2841 ; SKX-LABEL: test_masked_z_8xfloat_perm_mask3: 2842 ; SKX: # %bb.0: 2843 ; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50] 2844 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2845 ; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] 2846 ; SKX-NEXT: retq # sched: [7:1.00] 2847 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6> 2848 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2849 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2850 ret <8 x float> %res 2851 } 2852 define <8 x float> @test_8xfloat_perm_mem_mask0(<8 x float>* %vp) { 2853 ; GENERIC-LABEL: test_8xfloat_perm_mem_mask0: 2854 ; GENERIC: # %bb.0: 2855 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2856 ; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 2857 ; GENERIC-NEXT: retq # sched: [1:1.00] 2858 ; 2859 ; SKX-LABEL: test_8xfloat_perm_mem_mask0: 2860 ; SKX: # %bb.0: 2861 ; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2862 ; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2863 ; SKX-NEXT: retq # sched: [7:1.00] 2864 %vec = load <8 x float>, <8 x float>* %vp 2865 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0> 2866 ret <8 x float> %res 2867 } 2868 define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { 2869 ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask0: 2870 ; GENERIC: # %bb.0: 2871 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2872 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2873 ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 2874 ; GENERIC-NEXT: retq # sched: [1:1.00] 2875 ; 2876 ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask0: 2877 ; SKX: # %bb.0: 2878 ; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2879 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2880 ; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 2881 ; SKX-NEXT: retq # sched: [7:1.00] 2882 %vec = load <8 x float>, <8 x float>* %vp 2883 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0> 2884 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2885 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2886 ret <8 x float> %res 2887 } 2888 2889 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x i32> %mask) { 2890 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask0: 2891 ; GENERIC: # %bb.0: 2892 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2893 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 2894 ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 2895 ; GENERIC-NEXT: retq # sched: [1:1.00] 2896 ; 2897 ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask0: 2898 ; SKX: # %bb.0: 2899 ; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] 2900 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 2901 ; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 2902 ; SKX-NEXT: retq # sched: [7:1.00] 2903 %vec = load <8 x float>, <8 x float>* %vp 2904 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0> 2905 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2906 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2907 ret <8 x float> %res 2908 } 2909 2910 define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { 2911 ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask1: 2912 ; GENERIC: # %bb.0: 2913 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50] 2914 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2915 ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 2916 ; GENERIC-NEXT: retq # sched: [1:1.00] 2917 ; 2918 ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask1: 2919 ; SKX: # %bb.0: 2920 ; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50] 2921 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2922 ; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 2923 ; SKX-NEXT: retq # sched: [7:1.00] 2924 %vec = load <8 x float>, <8 x float>* %vp 2925 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 4, i32 0, i32 6, i32 6, i32 6> 2926 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2927 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2928 ret <8 x float> %res 2929 } 2930 2931 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x i32> %mask) { 2932 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask1: 2933 ; GENERIC: # %bb.0: 2934 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] 2935 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 2936 ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 2937 ; GENERIC-NEXT: retq # sched: [1:1.00] 2938 ; 2939 ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask1: 2940 ; SKX: # %bb.0: 2941 ; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] 2942 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 2943 ; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 2944 ; SKX-NEXT: retq # sched: [7:1.00] 2945 %vec = load <8 x float>, <8 x float>* %vp 2946 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 4, i32 0, i32 6, i32 6, i32 6> 2947 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2948 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2949 ret <8 x float> %res 2950 } 2951 2952 define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { 2953 ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask2: 2954 ; GENERIC: # %bb.0: 2955 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50] 2956 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 2957 ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 2958 ; GENERIC-NEXT: retq # sched: [1:1.00] 2959 ; 2960 ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask2: 2961 ; SKX: # %bb.0: 2962 ; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50] 2963 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 2964 ; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 2965 ; SKX-NEXT: retq # sched: [7:1.00] 2966 %vec = load <8 x float>, <8 x float>* %vp 2967 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 1, i32 5, i32 6, i32 6, i32 2, i32 4> 2968 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2969 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 2970 ret <8 x float> %res 2971 } 2972 2973 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x i32> %mask) { 2974 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask2: 2975 ; GENERIC: # %bb.0: 2976 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] 2977 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 2978 ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 2979 ; GENERIC-NEXT: retq # sched: [1:1.00] 2980 ; 2981 ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask2: 2982 ; SKX: # %bb.0: 2983 ; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] 2984 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 2985 ; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 2986 ; SKX-NEXT: retq # sched: [7:1.00] 2987 %vec = load <8 x float>, <8 x float>* %vp 2988 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 1, i32 5, i32 6, i32 6, i32 2, i32 4> 2989 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 2990 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 2991 ret <8 x float> %res 2992 } 2993 2994 define <8 x float> @test_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x i32> %mask) { 2995 ; GENERIC-LABEL: test_8xfloat_perm_mem_mask3: 2996 ; GENERIC: # %bb.0: 2997 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 2998 ; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 2999 ; GENERIC-NEXT: retq # sched: [1:1.00] 3000 ; 3001 ; SKX-LABEL: test_8xfloat_perm_mem_mask3: 3002 ; SKX: # %bb.0: 3003 ; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 3004 ; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 3005 ; SKX-NEXT: retq # sched: [7:1.00] 3006 %vec = load <8 x float>, <8 x float>* %vp 3007 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0> 3008 ret <8 x float> %res 3009 } 3010 define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { 3011 ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask3: 3012 ; GENERIC: # %bb.0: 3013 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 3014 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 3015 ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] 3016 ; GENERIC-NEXT: retq # sched: [1:1.00] 3017 ; 3018 ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask3: 3019 ; SKX: # %bb.0: 3020 ; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 3021 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 3022 ; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] 3023 ; SKX-NEXT: retq # sched: [7:1.00] 3024 %vec = load <8 x float>, <8 x float>* %vp 3025 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0> 3026 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 3027 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 3028 ret <8 x float> %res 3029 } 3030 3031 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x i32> %mask) { 3032 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask3: 3033 ; GENERIC: # %bb.0: 3034 ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 3035 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 3036 ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] 3037 ; GENERIC-NEXT: retq # sched: [1:1.00] 3038 ; 3039 ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask3: 3040 ; SKX: # %bb.0: 3041 ; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] 3042 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 3043 ; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] 3044 ; SKX-NEXT: retq # sched: [7:1.00] 3045 %vec = load <8 x float>, <8 x float>* %vp 3046 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0> 3047 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 3048 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 3049 ret <8 x float> %res 3050 } 3051 3052 define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) { 3053 ; GENERIC-LABEL: test_16xfloat_perm_mask0: 3054 ; GENERIC: # %bb.0: 3055 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] 3056 ; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 3057 ; GENERIC-NEXT: retq # sched: [1:1.00] 3058 ; 3059 ; SKX-LABEL: test_16xfloat_perm_mask0: 3060 ; SKX: # %bb.0: 3061 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] 3062 ; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 3063 ; SKX-NEXT: retq # sched: [7:1.00] 3064 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7> 3065 ret <16 x float> %res 3066 } 3067 define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { 3068 ; GENERIC-LABEL: test_masked_16xfloat_perm_mask0: 3069 ; GENERIC: # %bb.0: 3070 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] 3071 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 3072 ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3073 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 3074 ; GENERIC-NEXT: retq # sched: [1:1.00] 3075 ; 3076 ; SKX-LABEL: test_masked_16xfloat_perm_mask0: 3077 ; SKX: # %bb.0: 3078 ; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] 3079 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 3080 ; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3081 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 3082 ; SKX-NEXT: retq # sched: [7:1.00] 3083 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7> 3084 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3085 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3086 ret <16 x float> %res 3087 } 3088 3089 define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x i32> %mask) { 3090 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask0: 3091 ; GENERIC: # %bb.0: 3092 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] 3093 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3094 ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3095 ; GENERIC-NEXT: retq # sched: [1:1.00] 3096 ; 3097 ; SKX-LABEL: test_masked_z_16xfloat_perm_mask0: 3098 ; SKX: # %bb.0: 3099 ; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] 3100 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3101 ; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3102 ; SKX-NEXT: retq # sched: [7:1.00] 3103 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7> 3104 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3105 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3106 ret <16 x float> %res 3107 } 3108 define <16 x float> @test_masked_16xfloat_perm_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { 3109 ; GENERIC-LABEL: test_masked_16xfloat_perm_mask1: 3110 ; GENERIC: # %bb.0: 3111 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [7:0.50] 3112 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 3113 ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3114 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 3115 ; GENERIC-NEXT: retq # sched: [1:1.00] 3116 ; 3117 ; SKX-LABEL: test_masked_16xfloat_perm_mask1: 3118 ; SKX: # %bb.0: 3119 ; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [8:0.50] 3120 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 3121 ; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3122 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 3123 ; SKX-NEXT: retq # sched: [7:1.00] 3124 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 4, i32 10, i32 4, i32 5, i32 8, i32 11, i32 2, i32 0, i32 10, i32 0, i32 0, i32 3, i32 10, i32 1> 3125 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3126 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3127 ret <16 x float> %res 3128 } 3129 3130 define <16 x float> @test_masked_z_16xfloat_perm_mask1(<16 x float> %vec, <16 x i32> %mask) { 3131 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask1: 3132 ; GENERIC: # %bb.0: 3133 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [7:0.50] 3134 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3135 ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3136 ; GENERIC-NEXT: retq # sched: [1:1.00] 3137 ; 3138 ; SKX-LABEL: test_masked_z_16xfloat_perm_mask1: 3139 ; SKX: # %bb.0: 3140 ; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [8:0.50] 3141 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3142 ; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3143 ; SKX-NEXT: retq # sched: [7:1.00] 3144 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 4, i32 10, i32 4, i32 5, i32 8, i32 11, i32 2, i32 0, i32 10, i32 0, i32 0, i32 3, i32 10, i32 1> 3145 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3146 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3147 ret <16 x float> %res 3148 } 3149 define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { 3150 ; GENERIC-LABEL: test_masked_16xfloat_perm_mask2: 3151 ; GENERIC: # %bb.0: 3152 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [7:0.50] 3153 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 3154 ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3155 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 3156 ; GENERIC-NEXT: retq # sched: [1:1.00] 3157 ; 3158 ; SKX-LABEL: test_masked_16xfloat_perm_mask2: 3159 ; SKX: # %bb.0: 3160 ; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [8:0.50] 3161 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 3162 ; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3163 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 3164 ; SKX-NEXT: retq # sched: [7:1.00] 3165 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 15, i32 6, i32 14, i32 3, i32 6, i32 5, i32 2, i32 5, i32 15, i32 11, i32 6, i32 6, i32 4, i32 8, i32 11> 3166 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3167 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3168 ret <16 x float> %res 3169 } 3170 3171 define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x i32> %mask) { 3172 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask2: 3173 ; GENERIC: # %bb.0: 3174 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [7:0.50] 3175 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3176 ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3177 ; GENERIC-NEXT: retq # sched: [1:1.00] 3178 ; 3179 ; SKX-LABEL: test_masked_z_16xfloat_perm_mask2: 3180 ; SKX: # %bb.0: 3181 ; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [8:0.50] 3182 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3183 ; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3184 ; SKX-NEXT: retq # sched: [7:1.00] 3185 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 15, i32 6, i32 14, i32 3, i32 6, i32 5, i32 2, i32 5, i32 15, i32 11, i32 6, i32 6, i32 4, i32 8, i32 11> 3186 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3187 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3188 ret <16 x float> %res 3189 } 3190 define <16 x float> @test_16xfloat_perm_mask3(<16 x float> %vec) { 3191 ; GENERIC-LABEL: test_16xfloat_perm_mask3: 3192 ; GENERIC: # %bb.0: 3193 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] 3194 ; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 3195 ; GENERIC-NEXT: retq # sched: [1:1.00] 3196 ; 3197 ; SKX-LABEL: test_16xfloat_perm_mask3: 3198 ; SKX: # %bb.0: 3199 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] 3200 ; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 3201 ; SKX-NEXT: retq # sched: [7:1.00] 3202 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3> 3203 ret <16 x float> %res 3204 } 3205 define <16 x float> @test_masked_16xfloat_perm_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { 3206 ; GENERIC-LABEL: test_masked_16xfloat_perm_mask3: 3207 ; GENERIC: # %bb.0: 3208 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] 3209 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 3210 ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3211 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 3212 ; GENERIC-NEXT: retq # sched: [1:1.00] 3213 ; 3214 ; SKX-LABEL: test_masked_16xfloat_perm_mask3: 3215 ; SKX: # %bb.0: 3216 ; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] 3217 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 3218 ; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3219 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 3220 ; SKX-NEXT: retq # sched: [7:1.00] 3221 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3> 3222 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3223 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3224 ret <16 x float> %res 3225 } 3226 3227 define <16 x float> @test_masked_z_16xfloat_perm_mask3(<16 x float> %vec, <16 x i32> %mask) { 3228 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask3: 3229 ; GENERIC: # %bb.0: 3230 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] 3231 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3232 ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3233 ; GENERIC-NEXT: retq # sched: [1:1.00] 3234 ; 3235 ; SKX-LABEL: test_masked_z_16xfloat_perm_mask3: 3236 ; SKX: # %bb.0: 3237 ; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] 3238 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3239 ; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3240 ; SKX-NEXT: retq # sched: [7:1.00] 3241 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3> 3242 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3243 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3244 ret <16 x float> %res 3245 } 3246 define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) { 3247 ; GENERIC-LABEL: test_16xfloat_perm_mem_mask0: 3248 ; GENERIC: # %bb.0: 3249 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] 3250 ; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 3251 ; GENERIC-NEXT: retq # sched: [1:1.00] 3252 ; 3253 ; SKX-LABEL: test_16xfloat_perm_mem_mask0: 3254 ; SKX: # %bb.0: 3255 ; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] 3256 ; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 3257 ; SKX-NEXT: retq # sched: [7:1.00] 3258 %vec = load <16 x float>, <16 x float>* %vp 3259 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1> 3260 ret <16 x float> %res 3261 } 3262 define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { 3263 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask0: 3264 ; GENERIC: # %bb.0: 3265 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] 3266 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3267 ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 3268 ; GENERIC-NEXT: retq # sched: [1:1.00] 3269 ; 3270 ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask0: 3271 ; SKX: # %bb.0: 3272 ; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] 3273 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3274 ; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 3275 ; SKX-NEXT: retq # sched: [7:1.00] 3276 %vec = load <16 x float>, <16 x float>* %vp 3277 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1> 3278 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3279 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3280 ret <16 x float> %res 3281 } 3282 3283 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x i32> %mask) { 3284 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask0: 3285 ; GENERIC: # %bb.0: 3286 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] 3287 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 3288 ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 3289 ; GENERIC-NEXT: retq # sched: [1:1.00] 3290 ; 3291 ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask0: 3292 ; SKX: # %bb.0: 3293 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] 3294 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 3295 ; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 3296 ; SKX-NEXT: retq # sched: [7:1.00] 3297 %vec = load <16 x float>, <16 x float>* %vp 3298 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1> 3299 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3300 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3301 ret <16 x float> %res 3302 } 3303 3304 define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { 3305 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask1: 3306 ; GENERIC: # %bb.0: 3307 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [7:0.50] 3308 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3309 ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 3310 ; GENERIC-NEXT: retq # sched: [1:1.00] 3311 ; 3312 ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask1: 3313 ; SKX: # %bb.0: 3314 ; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [8:0.50] 3315 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3316 ; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 3317 ; SKX-NEXT: retq # sched: [7:1.00] 3318 %vec = load <16 x float>, <16 x float>* %vp 3319 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 4, i32 2, i32 3, i32 5, i32 11, i32 6, i32 4, i32 7, i32 6, i32 4, i32 14, i32 8, i32 15, i32 12, i32 9, i32 4> 3320 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3321 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3322 ret <16 x float> %res 3323 } 3324 3325 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x i32> %mask) { 3326 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask1: 3327 ; GENERIC: # %bb.0: 3328 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [7:0.50] 3329 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 3330 ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 3331 ; GENERIC-NEXT: retq # sched: [1:1.00] 3332 ; 3333 ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask1: 3334 ; SKX: # %bb.0: 3335 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [8:0.50] 3336 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 3337 ; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 3338 ; SKX-NEXT: retq # sched: [7:1.00] 3339 %vec = load <16 x float>, <16 x float>* %vp 3340 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 4, i32 2, i32 3, i32 5, i32 11, i32 6, i32 4, i32 7, i32 6, i32 4, i32 14, i32 8, i32 15, i32 12, i32 9, i32 4> 3341 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3342 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3343 ret <16 x float> %res 3344 } 3345 3346 define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { 3347 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask2: 3348 ; GENERIC: # %bb.0: 3349 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [7:0.50] 3350 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3351 ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 3352 ; GENERIC-NEXT: retq # sched: [1:1.00] 3353 ; 3354 ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask2: 3355 ; SKX: # %bb.0: 3356 ; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [8:0.50] 3357 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3358 ; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 3359 ; SKX-NEXT: retq # sched: [7:1.00] 3360 %vec = load <16 x float>, <16 x float>* %vp 3361 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 11, i32 6, i32 7, i32 0, i32 11, i32 0, i32 10, i32 9, i32 12, i32 4, i32 10, i32 3, i32 8, i32 5> 3362 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3363 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3364 ret <16 x float> %res 3365 } 3366 3367 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x i32> %mask) { 3368 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask2: 3369 ; GENERIC: # %bb.0: 3370 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [7:0.50] 3371 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 3372 ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 3373 ; GENERIC-NEXT: retq # sched: [1:1.00] 3374 ; 3375 ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask2: 3376 ; SKX: # %bb.0: 3377 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [8:0.50] 3378 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 3379 ; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 3380 ; SKX-NEXT: retq # sched: [7:1.00] 3381 %vec = load <16 x float>, <16 x float>* %vp 3382 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 11, i32 6, i32 7, i32 0, i32 11, i32 0, i32 10, i32 9, i32 12, i32 4, i32 10, i32 3, i32 8, i32 5> 3383 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3384 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3385 ret <16 x float> %res 3386 } 3387 3388 define <16 x float> @test_16xfloat_perm_mem_mask3(<16 x float>* %vp) { 3389 ; GENERIC-LABEL: test_16xfloat_perm_mem_mask3: 3390 ; GENERIC: # %bb.0: 3391 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] 3392 ; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 3393 ; GENERIC-NEXT: retq # sched: [1:1.00] 3394 ; 3395 ; SKX-LABEL: test_16xfloat_perm_mem_mask3: 3396 ; SKX: # %bb.0: 3397 ; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] 3398 ; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 3399 ; SKX-NEXT: retq # sched: [7:1.00] 3400 %vec = load <16 x float>, <16 x float>* %vp 3401 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0> 3402 ret <16 x float> %res 3403 } 3404 define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { 3405 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask3: 3406 ; GENERIC: # %bb.0: 3407 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] 3408 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 3409 ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 3410 ; GENERIC-NEXT: retq # sched: [1:1.00] 3411 ; 3412 ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask3: 3413 ; SKX: # %bb.0: 3414 ; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] 3415 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 3416 ; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 3417 ; SKX-NEXT: retq # sched: [7:1.00] 3418 %vec = load <16 x float>, <16 x float>* %vp 3419 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0> 3420 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3421 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 3422 ret <16 x float> %res 3423 } 3424 3425 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x i32> %mask) { 3426 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask3: 3427 ; GENERIC: # %bb.0: 3428 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] 3429 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 3430 ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 3431 ; GENERIC-NEXT: retq # sched: [1:1.00] 3432 ; 3433 ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask3: 3434 ; SKX: # %bb.0: 3435 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] 3436 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 3437 ; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 3438 ; SKX-NEXT: retq # sched: [7:1.00] 3439 %vec = load <16 x float>, <16 x float>* %vp 3440 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0> 3441 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 3442 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 3443 ret <16 x float> %res 3444 } 3445 3446 define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) { 3447 ; GENERIC-LABEL: test_4xdouble_perm_mask0: 3448 ; GENERIC: # %bb.0: 3449 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] sched: [1:1.00] 3450 ; GENERIC-NEXT: retq # sched: [1:1.00] 3451 ; 3452 ; SKX-LABEL: test_4xdouble_perm_mask0: 3453 ; SKX: # %bb.0: 3454 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] sched: [3:1.00] 3455 ; SKX-NEXT: retq # sched: [7:1.00] 3456 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2> 3457 ret <4 x double> %res 3458 } 3459 define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { 3460 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask0: 3461 ; GENERIC: # %bb.0: 3462 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 3463 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [1:1.00] 3464 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 3465 ; GENERIC-NEXT: retq # sched: [1:1.00] 3466 ; 3467 ; SKX-LABEL: test_masked_4xdouble_perm_mask0: 3468 ; SKX: # %bb.0: 3469 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 3470 ; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [3:1.00] 3471 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 3472 ; SKX-NEXT: retq # sched: [7:1.00] 3473 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2> 3474 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3475 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3476 ret <4 x double> %res 3477 } 3478 3479 define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x i64> %mask) { 3480 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask0: 3481 ; GENERIC: # %bb.0: 3482 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3483 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [1:1.00] 3484 ; GENERIC-NEXT: retq # sched: [1:1.00] 3485 ; 3486 ; SKX-LABEL: test_masked_z_4xdouble_perm_mask0: 3487 ; SKX: # %bb.0: 3488 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3489 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [3:1.00] 3490 ; SKX-NEXT: retq # sched: [7:1.00] 3491 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2> 3492 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3493 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3494 ret <4 x double> %res 3495 } 3496 define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { 3497 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask1: 3498 ; GENERIC: # %bb.0: 3499 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 3500 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [1:1.00] 3501 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 3502 ; GENERIC-NEXT: retq # sched: [1:1.00] 3503 ; 3504 ; SKX-LABEL: test_masked_4xdouble_perm_mask1: 3505 ; SKX: # %bb.0: 3506 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 3507 ; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [3:1.00] 3508 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 3509 ; SKX-NEXT: retq # sched: [7:1.00] 3510 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 3511 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3512 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3513 ret <4 x double> %res 3514 } 3515 3516 define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x i64> %mask) { 3517 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask1: 3518 ; GENERIC: # %bb.0: 3519 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3520 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [1:1.00] 3521 ; GENERIC-NEXT: retq # sched: [1:1.00] 3522 ; 3523 ; SKX-LABEL: test_masked_z_4xdouble_perm_mask1: 3524 ; SKX: # %bb.0: 3525 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3526 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [3:1.00] 3527 ; SKX-NEXT: retq # sched: [7:1.00] 3528 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 3529 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3530 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3531 ret <4 x double> %res 3532 } 3533 define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { 3534 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask2: 3535 ; GENERIC: # %bb.0: 3536 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 3537 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [1:1.00] 3538 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 3539 ; GENERIC-NEXT: retq # sched: [1:1.00] 3540 ; 3541 ; SKX-LABEL: test_masked_4xdouble_perm_mask2: 3542 ; SKX: # %bb.0: 3543 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 3544 ; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [3:1.00] 3545 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 3546 ; SKX-NEXT: retq # sched: [7:1.00] 3547 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1> 3548 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3549 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3550 ret <4 x double> %res 3551 } 3552 3553 define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x i64> %mask) { 3554 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask2: 3555 ; GENERIC: # %bb.0: 3556 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3557 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [1:1.00] 3558 ; GENERIC-NEXT: retq # sched: [1:1.00] 3559 ; 3560 ; SKX-LABEL: test_masked_z_4xdouble_perm_mask2: 3561 ; SKX: # %bb.0: 3562 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3563 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [3:1.00] 3564 ; SKX-NEXT: retq # sched: [7:1.00] 3565 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1> 3566 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3567 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3568 ret <4 x double> %res 3569 } 3570 define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) { 3571 ; GENERIC-LABEL: test_4xdouble_perm_mask3: 3572 ; GENERIC: # %bb.0: 3573 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] sched: [1:1.00] 3574 ; GENERIC-NEXT: retq # sched: [1:1.00] 3575 ; 3576 ; SKX-LABEL: test_4xdouble_perm_mask3: 3577 ; SKX: # %bb.0: 3578 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] sched: [3:1.00] 3579 ; SKX-NEXT: retq # sched: [7:1.00] 3580 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2> 3581 ret <4 x double> %res 3582 } 3583 define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { 3584 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask3: 3585 ; GENERIC: # %bb.0: 3586 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 3587 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [1:1.00] 3588 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 3589 ; GENERIC-NEXT: retq # sched: [1:1.00] 3590 ; 3591 ; SKX-LABEL: test_masked_4xdouble_perm_mask3: 3592 ; SKX: # %bb.0: 3593 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 3594 ; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [3:1.00] 3595 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 3596 ; SKX-NEXT: retq # sched: [7:1.00] 3597 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2> 3598 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3599 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3600 ret <4 x double> %res 3601 } 3602 3603 define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x i64> %mask) { 3604 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask3: 3605 ; GENERIC: # %bb.0: 3606 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3607 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [1:1.00] 3608 ; GENERIC-NEXT: retq # sched: [1:1.00] 3609 ; 3610 ; SKX-LABEL: test_masked_z_4xdouble_perm_mask3: 3611 ; SKX: # %bb.0: 3612 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3613 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [3:1.00] 3614 ; SKX-NEXT: retq # sched: [7:1.00] 3615 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2> 3616 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3617 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3618 ret <4 x double> %res 3619 } 3620 define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { 3621 ; GENERIC-LABEL: test_4xdouble_perm_mem_mask0: 3622 ; GENERIC: # %bb.0: 3623 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [8:1.00] 3624 ; GENERIC-NEXT: retq # sched: [1:1.00] 3625 ; 3626 ; SKX-LABEL: test_4xdouble_perm_mem_mask0: 3627 ; SKX: # %bb.0: 3628 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [10:1.00] 3629 ; SKX-NEXT: retq # sched: [7:1.00] 3630 %vec = load <4 x double>, <4 x double>* %vp 3631 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 3632 ret <4 x double> %res 3633 } 3634 define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { 3635 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0: 3636 ; GENERIC: # %bb.0: 3637 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3638 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [8:1.00] 3639 ; GENERIC-NEXT: retq # sched: [1:1.00] 3640 ; 3641 ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask0: 3642 ; SKX: # %bb.0: 3643 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3644 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [10:1.00] 3645 ; SKX-NEXT: retq # sched: [7:1.00] 3646 %vec = load <4 x double>, <4 x double>* %vp 3647 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 3648 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3649 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3650 ret <4 x double> %res 3651 } 3652 3653 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x i64> %mask) { 3654 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0: 3655 ; GENERIC: # %bb.0: 3656 ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 3657 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [8:1.00] 3658 ; GENERIC-NEXT: retq # sched: [1:1.00] 3659 ; 3660 ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask0: 3661 ; SKX: # %bb.0: 3662 ; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 3663 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [10:1.00] 3664 ; SKX-NEXT: retq # sched: [7:1.00] 3665 %vec = load <4 x double>, <4 x double>* %vp 3666 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 3667 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3668 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3669 ret <4 x double> %res 3670 } 3671 3672 define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { 3673 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1: 3674 ; GENERIC: # %bb.0: 3675 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3676 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [8:1.00] 3677 ; GENERIC-NEXT: retq # sched: [1:1.00] 3678 ; 3679 ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask1: 3680 ; SKX: # %bb.0: 3681 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3682 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [10:1.00] 3683 ; SKX-NEXT: retq # sched: [7:1.00] 3684 %vec = load <4 x double>, <4 x double>* %vp 3685 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 2> 3686 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3687 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3688 ret <4 x double> %res 3689 } 3690 3691 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x i64> %mask) { 3692 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1: 3693 ; GENERIC: # %bb.0: 3694 ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 3695 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [8:1.00] 3696 ; GENERIC-NEXT: retq # sched: [1:1.00] 3697 ; 3698 ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask1: 3699 ; SKX: # %bb.0: 3700 ; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 3701 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [10:1.00] 3702 ; SKX-NEXT: retq # sched: [7:1.00] 3703 %vec = load <4 x double>, <4 x double>* %vp 3704 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 2> 3705 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3706 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3707 ret <4 x double> %res 3708 } 3709 3710 define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { 3711 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2: 3712 ; GENERIC: # %bb.0: 3713 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3714 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [8:1.00] 3715 ; GENERIC-NEXT: retq # sched: [1:1.00] 3716 ; 3717 ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask2: 3718 ; SKX: # %bb.0: 3719 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3720 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [10:1.00] 3721 ; SKX-NEXT: retq # sched: [7:1.00] 3722 %vec = load <4 x double>, <4 x double>* %vp 3723 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 3724 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3725 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3726 ret <4 x double> %res 3727 } 3728 3729 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x i64> %mask) { 3730 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2: 3731 ; GENERIC: # %bb.0: 3732 ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 3733 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [8:1.00] 3734 ; GENERIC-NEXT: retq # sched: [1:1.00] 3735 ; 3736 ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask2: 3737 ; SKX: # %bb.0: 3738 ; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 3739 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [10:1.00] 3740 ; SKX-NEXT: retq # sched: [7:1.00] 3741 %vec = load <4 x double>, <4 x double>* %vp 3742 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 3743 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3744 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3745 ret <4 x double> %res 3746 } 3747 3748 define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { 3749 ; GENERIC-LABEL: test_4xdouble_perm_mem_mask3: 3750 ; GENERIC: # %bb.0: 3751 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [8:1.00] 3752 ; GENERIC-NEXT: retq # sched: [1:1.00] 3753 ; 3754 ; SKX-LABEL: test_4xdouble_perm_mem_mask3: 3755 ; SKX: # %bb.0: 3756 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [10:1.00] 3757 ; SKX-NEXT: retq # sched: [7:1.00] 3758 %vec = load <4 x double>, <4 x double>* %vp 3759 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 3760 ret <4 x double> %res 3761 } 3762 define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { 3763 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3: 3764 ; GENERIC: # %bb.0: 3765 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 3766 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [8:1.00] 3767 ; GENERIC-NEXT: retq # sched: [1:1.00] 3768 ; 3769 ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask3: 3770 ; SKX: # %bb.0: 3771 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 3772 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [10:1.00] 3773 ; SKX-NEXT: retq # sched: [7:1.00] 3774 %vec = load <4 x double>, <4 x double>* %vp 3775 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 3776 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3777 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 3778 ret <4 x double> %res 3779 } 3780 3781 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x i64> %mask) { 3782 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3: 3783 ; GENERIC: # %bb.0: 3784 ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] 3785 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [8:1.00] 3786 ; GENERIC-NEXT: retq # sched: [1:1.00] 3787 ; 3788 ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask3: 3789 ; SKX: # %bb.0: 3790 ; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] 3791 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [10:1.00] 3792 ; SKX-NEXT: retq # sched: [7:1.00] 3793 %vec = load <4 x double>, <4 x double>* %vp 3794 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 3795 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 3796 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 3797 ret <4 x double> %res 3798 } 3799 3800 define <8 x double> @test_8xdouble_perm_mask0(<8 x double> %vec) { 3801 ; GENERIC-LABEL: test_8xdouble_perm_mask0: 3802 ; GENERIC: # %bb.0: 3803 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [7:0.50] 3804 ; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 3805 ; GENERIC-NEXT: retq # sched: [1:1.00] 3806 ; 3807 ; SKX-LABEL: test_8xdouble_perm_mask0: 3808 ; SKX: # %bb.0: 3809 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [8:0.50] 3810 ; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 3811 ; SKX-NEXT: retq # sched: [7:1.00] 3812 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4> 3813 ret <8 x double> %res 3814 } 3815 define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 3816 ; GENERIC-LABEL: test_masked_8xdouble_perm_mask0: 3817 ; GENERIC: # %bb.0: 3818 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [7:0.50] 3819 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 3820 ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3821 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 3822 ; GENERIC-NEXT: retq # sched: [1:1.00] 3823 ; 3824 ; SKX-LABEL: test_masked_8xdouble_perm_mask0: 3825 ; SKX: # %bb.0: 3826 ; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [8:0.50] 3827 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 3828 ; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3829 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 3830 ; SKX-NEXT: retq # sched: [7:1.00] 3831 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4> 3832 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3833 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 3834 ret <8 x double> %res 3835 } 3836 3837 define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x i64> %mask) { 3838 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask0: 3839 ; GENERIC: # %bb.0: 3840 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [7:0.50] 3841 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 3842 ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3843 ; GENERIC-NEXT: retq # sched: [1:1.00] 3844 ; 3845 ; SKX-LABEL: test_masked_z_8xdouble_perm_mask0: 3846 ; SKX: # %bb.0: 3847 ; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [8:0.50] 3848 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 3849 ; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3850 ; SKX-NEXT: retq # sched: [7:1.00] 3851 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4> 3852 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3853 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 3854 ret <8 x double> %res 3855 } 3856 define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 3857 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask1: 3858 ; GENERIC: # %bb.0: 3859 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 3860 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] 3861 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 3862 ; GENERIC-NEXT: retq # sched: [1:1.00] 3863 ; 3864 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mask1: 3865 ; SKX: # %bb.0: 3866 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 3867 ; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [3:1.00] 3868 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 3869 ; SKX-NEXT: retq # sched: [7:1.00] 3870 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 7, i32 4, i32 4, i32 6> 3871 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3872 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 3873 ret <8 x double> %res 3874 } 3875 3876 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x i64> %mask) { 3877 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask1: 3878 ; GENERIC: # %bb.0: 3879 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 3880 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] 3881 ; GENERIC-NEXT: retq # sched: [1:1.00] 3882 ; 3883 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask1: 3884 ; SKX: # %bb.0: 3885 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 3886 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [3:1.00] 3887 ; SKX-NEXT: retq # sched: [7:1.00] 3888 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 7, i32 4, i32 4, i32 6> 3889 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3890 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 3891 ret <8 x double> %res 3892 } 3893 define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 3894 ; GENERIC-LABEL: test_masked_8xdouble_perm_mask2: 3895 ; GENERIC: # %bb.0: 3896 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [7:0.50] 3897 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 3898 ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3899 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 3900 ; GENERIC-NEXT: retq # sched: [1:1.00] 3901 ; 3902 ; SKX-LABEL: test_masked_8xdouble_perm_mask2: 3903 ; SKX: # %bb.0: 3904 ; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [8:0.50] 3905 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 3906 ; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3907 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 3908 ; SKX-NEXT: retq # sched: [7:1.00] 3909 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 5, i32 1, i32 7> 3910 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3911 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 3912 ret <8 x double> %res 3913 } 3914 3915 define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x i64> %mask) { 3916 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask2: 3917 ; GENERIC: # %bb.0: 3918 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [7:0.50] 3919 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 3920 ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 3921 ; GENERIC-NEXT: retq # sched: [1:1.00] 3922 ; 3923 ; SKX-LABEL: test_masked_z_8xdouble_perm_mask2: 3924 ; SKX: # %bb.0: 3925 ; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [8:0.50] 3926 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 3927 ; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 3928 ; SKX-NEXT: retq # sched: [7:1.00] 3929 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 5, i32 1, i32 7> 3930 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3931 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 3932 ret <8 x double> %res 3933 } 3934 define <8 x double> @test_8xdouble_perm_imm_mask3(<8 x double> %vec) { 3935 ; GENERIC-LABEL: test_8xdouble_perm_imm_mask3: 3936 ; GENERIC: # %bb.0: 3937 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] 3938 ; GENERIC-NEXT: retq # sched: [1:1.00] 3939 ; 3940 ; SKX-LABEL: test_8xdouble_perm_imm_mask3: 3941 ; SKX: # %bb.0: 3942 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] 3943 ; SKX-NEXT: retq # sched: [7:1.00] 3944 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4> 3945 ret <8 x double> %res 3946 } 3947 define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 3948 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask3: 3949 ; GENERIC: # %bb.0: 3950 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 3951 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] 3952 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 3953 ; GENERIC-NEXT: retq # sched: [1:1.00] 3954 ; 3955 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mask3: 3956 ; SKX: # %bb.0: 3957 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 3958 ; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] 3959 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 3960 ; SKX-NEXT: retq # sched: [7:1.00] 3961 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4> 3962 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3963 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 3964 ret <8 x double> %res 3965 } 3966 3967 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x i64> %mask) { 3968 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask3: 3969 ; GENERIC: # %bb.0: 3970 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 3971 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] 3972 ; GENERIC-NEXT: retq # sched: [1:1.00] 3973 ; 3974 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask3: 3975 ; SKX: # %bb.0: 3976 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 3977 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] 3978 ; SKX-NEXT: retq # sched: [7:1.00] 3979 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4> 3980 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 3981 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 3982 ret <8 x double> %res 3983 } 3984 define <8 x double> @test_masked_8xdouble_perm_mask4(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 3985 ; GENERIC-LABEL: test_masked_8xdouble_perm_mask4: 3986 ; GENERIC: # %bb.0: 3987 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [7:0.50] 3988 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 3989 ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 3990 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 3991 ; GENERIC-NEXT: retq # sched: [1:1.00] 3992 ; 3993 ; SKX-LABEL: test_masked_8xdouble_perm_mask4: 3994 ; SKX: # %bb.0: 3995 ; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [8:0.50] 3996 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 3997 ; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 3998 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 3999 ; SKX-NEXT: retq # sched: [7:1.00] 4000 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 5, i32 3, i32 4, i32 6, i32 5, i32 7, i32 1> 4001 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4002 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4003 ret <8 x double> %res 4004 } 4005 4006 define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x i64> %mask) { 4007 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask4: 4008 ; GENERIC: # %bb.0: 4009 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [7:0.50] 4010 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4011 ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 4012 ; GENERIC-NEXT: retq # sched: [1:1.00] 4013 ; 4014 ; SKX-LABEL: test_masked_z_8xdouble_perm_mask4: 4015 ; SKX: # %bb.0: 4016 ; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [8:0.50] 4017 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4018 ; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 4019 ; SKX-NEXT: retq # sched: [7:1.00] 4020 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 5, i32 3, i32 4, i32 6, i32 5, i32 7, i32 1> 4021 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4022 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4023 ret <8 x double> %res 4024 } 4025 define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 4026 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask5: 4027 ; GENERIC: # %bb.0: 4028 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 4029 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] 4030 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 4031 ; GENERIC-NEXT: retq # sched: [1:1.00] 4032 ; 4033 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mask5: 4034 ; SKX: # %bb.0: 4035 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 4036 ; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [3:1.00] 4037 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 4038 ; SKX-NEXT: retq # sched: [7:1.00] 4039 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7> 4040 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4041 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4042 ret <8 x double> %res 4043 } 4044 4045 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x i64> %mask) { 4046 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask5: 4047 ; GENERIC: # %bb.0: 4048 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4049 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] 4050 ; GENERIC-NEXT: retq # sched: [1:1.00] 4051 ; 4052 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask5: 4053 ; SKX: # %bb.0: 4054 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4055 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [3:1.00] 4056 ; SKX-NEXT: retq # sched: [7:1.00] 4057 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7> 4058 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4059 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4060 ret <8 x double> %res 4061 } 4062 define <8 x double> @test_8xdouble_perm_mask6(<8 x double> %vec) { 4063 ; GENERIC-LABEL: test_8xdouble_perm_mask6: 4064 ; GENERIC: # %bb.0: 4065 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [7:0.50] 4066 ; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 4067 ; GENERIC-NEXT: retq # sched: [1:1.00] 4068 ; 4069 ; SKX-LABEL: test_8xdouble_perm_mask6: 4070 ; SKX: # %bb.0: 4071 ; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [8:0.50] 4072 ; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 4073 ; SKX-NEXT: retq # sched: [7:1.00] 4074 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2> 4075 ret <8 x double> %res 4076 } 4077 define <8 x double> @test_masked_8xdouble_perm_mask6(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 4078 ; GENERIC-LABEL: test_masked_8xdouble_perm_mask6: 4079 ; GENERIC: # %bb.0: 4080 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [7:0.50] 4081 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 4082 ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] 4083 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 4084 ; GENERIC-NEXT: retq # sched: [1:1.00] 4085 ; 4086 ; SKX-LABEL: test_masked_8xdouble_perm_mask6: 4087 ; SKX: # %bb.0: 4088 ; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [8:0.50] 4089 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 4090 ; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] 4091 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 4092 ; SKX-NEXT: retq # sched: [7:1.00] 4093 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2> 4094 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4095 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4096 ret <8 x double> %res 4097 } 4098 4099 define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x i64> %mask) { 4100 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask6: 4101 ; GENERIC: # %bb.0: 4102 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [7:0.50] 4103 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4104 ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] 4105 ; GENERIC-NEXT: retq # sched: [1:1.00] 4106 ; 4107 ; SKX-LABEL: test_masked_z_8xdouble_perm_mask6: 4108 ; SKX: # %bb.0: 4109 ; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [8:0.50] 4110 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4111 ; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] 4112 ; SKX-NEXT: retq # sched: [7:1.00] 4113 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2> 4114 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4115 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4116 ret <8 x double> %res 4117 } 4118 define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { 4119 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask7: 4120 ; GENERIC: # %bb.0: 4121 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 4122 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] 4123 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 4124 ; GENERIC-NEXT: retq # sched: [1:1.00] 4125 ; 4126 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mask7: 4127 ; SKX: # %bb.0: 4128 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 4129 ; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [3:1.00] 4130 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 4131 ; SKX-NEXT: retq # sched: [7:1.00] 4132 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 2, i32 7, i32 5, i32 7, i32 6> 4133 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4134 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4135 ret <8 x double> %res 4136 } 4137 4138 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x i64> %mask) { 4139 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask7: 4140 ; GENERIC: # %bb.0: 4141 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4142 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] 4143 ; GENERIC-NEXT: retq # sched: [1:1.00] 4144 ; 4145 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask7: 4146 ; SKX: # %bb.0: 4147 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4148 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [3:1.00] 4149 ; SKX-NEXT: retq # sched: [7:1.00] 4150 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 2, i32 7, i32 5, i32 7, i32 6> 4151 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4152 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4153 ret <8 x double> %res 4154 } 4155 define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) { 4156 ; GENERIC-LABEL: test_8xdouble_perm_mem_mask0: 4157 ; GENERIC: # %bb.0: 4158 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [7:0.50] 4159 ; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 4160 ; GENERIC-NEXT: retq # sched: [1:1.00] 4161 ; 4162 ; SKX-LABEL: test_8xdouble_perm_mem_mask0: 4163 ; SKX: # %bb.0: 4164 ; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [8:0.50] 4165 ; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 4166 ; SKX-NEXT: retq # sched: [7:1.00] 4167 %vec = load <8 x double>, <8 x double>* %vp 4168 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1> 4169 ret <8 x double> %res 4170 } 4171 define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4172 ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask0: 4173 ; GENERIC: # %bb.0: 4174 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [7:0.50] 4175 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4176 ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 4177 ; GENERIC-NEXT: retq # sched: [1:1.00] 4178 ; 4179 ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask0: 4180 ; SKX: # %bb.0: 4181 ; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [8:0.50] 4182 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4183 ; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 4184 ; SKX-NEXT: retq # sched: [7:1.00] 4185 %vec = load <8 x double>, <8 x double>* %vp 4186 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1> 4187 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4188 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4189 ret <8 x double> %res 4190 } 4191 4192 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x i64> %mask) { 4193 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask0: 4194 ; GENERIC: # %bb.0: 4195 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [7:0.50] 4196 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4197 ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 4198 ; GENERIC-NEXT: retq # sched: [1:1.00] 4199 ; 4200 ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask0: 4201 ; SKX: # %bb.0: 4202 ; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [8:0.50] 4203 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4204 ; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 4205 ; SKX-NEXT: retq # sched: [7:1.00] 4206 %vec = load <8 x double>, <8 x double>* %vp 4207 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1> 4208 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4209 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4210 ret <8 x double> %res 4211 } 4212 4213 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4214 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: 4215 ; GENERIC: # %bb.0: 4216 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4217 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00] 4218 ; GENERIC-NEXT: retq # sched: [1:1.00] 4219 ; 4220 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: 4221 ; SKX: # %bb.0: 4222 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4223 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [10:1.00] 4224 ; SKX-NEXT: retq # sched: [7:1.00] 4225 %vec = load <8 x double>, <8 x double>* %vp 4226 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 0, i32 3, i32 4, i32 6, i32 4, i32 7> 4227 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4228 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4229 ret <8 x double> %res 4230 } 4231 4232 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x i64> %mask) { 4233 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: 4234 ; GENERIC: # %bb.0: 4235 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4236 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00] 4237 ; GENERIC-NEXT: retq # sched: [1:1.00] 4238 ; 4239 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: 4240 ; SKX: # %bb.0: 4241 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4242 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [10:1.00] 4243 ; SKX-NEXT: retq # sched: [7:1.00] 4244 %vec = load <8 x double>, <8 x double>* %vp 4245 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 0, i32 3, i32 4, i32 6, i32 4, i32 7> 4246 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4247 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4248 ret <8 x double> %res 4249 } 4250 4251 define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4252 ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask2: 4253 ; GENERIC: # %bb.0: 4254 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [7:0.50] 4255 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4256 ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 4257 ; GENERIC-NEXT: retq # sched: [1:1.00] 4258 ; 4259 ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask2: 4260 ; SKX: # %bb.0: 4261 ; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [8:0.50] 4262 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4263 ; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 4264 ; SKX-NEXT: retq # sched: [7:1.00] 4265 %vec = load <8 x double>, <8 x double>* %vp 4266 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 6, i32 7, i32 2, i32 7, i32 7, i32 6, i32 2, i32 5> 4267 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4268 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4269 ret <8 x double> %res 4270 } 4271 4272 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x i64> %mask) { 4273 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask2: 4274 ; GENERIC: # %bb.0: 4275 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [7:0.50] 4276 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4277 ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 4278 ; GENERIC-NEXT: retq # sched: [1:1.00] 4279 ; 4280 ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask2: 4281 ; SKX: # %bb.0: 4282 ; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [8:0.50] 4283 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4284 ; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 4285 ; SKX-NEXT: retq # sched: [7:1.00] 4286 %vec = load <8 x double>, <8 x double>* %vp 4287 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 6, i32 7, i32 2, i32 7, i32 7, i32 6, i32 2, i32 5> 4288 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4289 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4290 ret <8 x double> %res 4291 } 4292 4293 define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) { 4294 ; GENERIC-LABEL: test_8xdouble_perm_imm_mem_mask3: 4295 ; GENERIC: # %bb.0: 4296 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] 4297 ; GENERIC-NEXT: retq # sched: [1:1.00] 4298 ; 4299 ; SKX-LABEL: test_8xdouble_perm_imm_mem_mask3: 4300 ; SKX: # %bb.0: 4301 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] 4302 ; SKX-NEXT: retq # sched: [7:1.00] 4303 %vec = load <8 x double>, <8 x double>* %vp 4304 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4> 4305 ret <8 x double> %res 4306 } 4307 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4308 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: 4309 ; GENERIC: # %bb.0: 4310 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4311 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] 4312 ; GENERIC-NEXT: retq # sched: [1:1.00] 4313 ; 4314 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: 4315 ; SKX: # %bb.0: 4316 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4317 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] 4318 ; SKX-NEXT: retq # sched: [7:1.00] 4319 %vec = load <8 x double>, <8 x double>* %vp 4320 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4> 4321 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4322 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4323 ret <8 x double> %res 4324 } 4325 4326 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x i64> %mask) { 4327 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: 4328 ; GENERIC: # %bb.0: 4329 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4330 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] 4331 ; GENERIC-NEXT: retq # sched: [1:1.00] 4332 ; 4333 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: 4334 ; SKX: # %bb.0: 4335 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4336 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] 4337 ; SKX-NEXT: retq # sched: [7:1.00] 4338 %vec = load <8 x double>, <8 x double>* %vp 4339 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4> 4340 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4341 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4342 ret <8 x double> %res 4343 } 4344 4345 define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4346 ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask4: 4347 ; GENERIC: # %bb.0: 4348 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [7:0.50] 4349 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4350 ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 4351 ; GENERIC-NEXT: retq # sched: [1:1.00] 4352 ; 4353 ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask4: 4354 ; SKX: # %bb.0: 4355 ; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [8:0.50] 4356 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4357 ; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 4358 ; SKX-NEXT: retq # sched: [7:1.00] 4359 %vec = load <8 x double>, <8 x double>* %vp 4360 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 5, i32 6, i32 0, i32 6, i32 0> 4361 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4362 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4363 ret <8 x double> %res 4364 } 4365 4366 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x i64> %mask) { 4367 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask4: 4368 ; GENERIC: # %bb.0: 4369 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [7:0.50] 4370 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4371 ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 4372 ; GENERIC-NEXT: retq # sched: [1:1.00] 4373 ; 4374 ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask4: 4375 ; SKX: # %bb.0: 4376 ; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [8:0.50] 4377 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4378 ; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 4379 ; SKX-NEXT: retq # sched: [7:1.00] 4380 %vec = load <8 x double>, <8 x double>* %vp 4381 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 5, i32 6, i32 0, i32 6, i32 0> 4382 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4383 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4384 ret <8 x double> %res 4385 } 4386 4387 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4388 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: 4389 ; GENERIC: # %bb.0: 4390 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4391 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00] 4392 ; GENERIC-NEXT: retq # sched: [1:1.00] 4393 ; 4394 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: 4395 ; SKX: # %bb.0: 4396 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4397 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [10:1.00] 4398 ; SKX-NEXT: retq # sched: [7:1.00] 4399 %vec = load <8 x double>, <8 x double>* %vp 4400 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 3, i32 6, i32 6, i32 6, i32 7> 4401 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4402 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4403 ret <8 x double> %res 4404 } 4405 4406 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x i64> %mask) { 4407 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: 4408 ; GENERIC: # %bb.0: 4409 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4410 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00] 4411 ; GENERIC-NEXT: retq # sched: [1:1.00] 4412 ; 4413 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: 4414 ; SKX: # %bb.0: 4415 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4416 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [10:1.00] 4417 ; SKX-NEXT: retq # sched: [7:1.00] 4418 %vec = load <8 x double>, <8 x double>* %vp 4419 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 3, i32 6, i32 6, i32 6, i32 7> 4420 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4421 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4422 ret <8 x double> %res 4423 } 4424 4425 define <8 x double> @test_8xdouble_perm_mem_mask6(<8 x double>* %vp) { 4426 ; GENERIC-LABEL: test_8xdouble_perm_mem_mask6: 4427 ; GENERIC: # %bb.0: 4428 ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [7:0.50] 4429 ; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 4430 ; GENERIC-NEXT: retq # sched: [1:1.00] 4431 ; 4432 ; SKX-LABEL: test_8xdouble_perm_mem_mask6: 4433 ; SKX: # %bb.0: 4434 ; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [8:0.50] 4435 ; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 4436 ; SKX-NEXT: retq # sched: [7:1.00] 4437 %vec = load <8 x double>, <8 x double>* %vp 4438 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5> 4439 ret <8 x double> %res 4440 } 4441 define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4442 ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask6: 4443 ; GENERIC: # %bb.0: 4444 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [7:0.50] 4445 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4446 ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] 4447 ; GENERIC-NEXT: retq # sched: [1:1.00] 4448 ; 4449 ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask6: 4450 ; SKX: # %bb.0: 4451 ; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [8:0.50] 4452 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4453 ; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] 4454 ; SKX-NEXT: retq # sched: [7:1.00] 4455 %vec = load <8 x double>, <8 x double>* %vp 4456 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5> 4457 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4458 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4459 ret <8 x double> %res 4460 } 4461 4462 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x i64> %mask) { 4463 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask6: 4464 ; GENERIC: # %bb.0: 4465 ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [7:0.50] 4466 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4467 ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] 4468 ; GENERIC-NEXT: retq # sched: [1:1.00] 4469 ; 4470 ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask6: 4471 ; SKX: # %bb.0: 4472 ; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [8:0.50] 4473 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4474 ; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] 4475 ; SKX-NEXT: retq # sched: [7:1.00] 4476 %vec = load <8 x double>, <8 x double>* %vp 4477 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5> 4478 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4479 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4480 ret <8 x double> %res 4481 } 4482 4483 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { 4484 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: 4485 ; GENERIC: # %bb.0: 4486 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 4487 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 4488 ; GENERIC-NEXT: retq # sched: [1:1.00] 4489 ; 4490 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: 4491 ; SKX: # %bb.0: 4492 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 4493 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [10:1.00] 4494 ; SKX-NEXT: retq # sched: [7:1.00] 4495 %vec = load <8 x double>, <8 x double>* %vp 4496 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 4497 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4498 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 4499 ret <8 x double> %res 4500 } 4501 4502 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x i64> %mask) { 4503 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: 4504 ; GENERIC: # %bb.0: 4505 ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 4506 ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 4507 ; GENERIC-NEXT: retq # sched: [1:1.00] 4508 ; 4509 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: 4510 ; SKX: # %bb.0: 4511 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 4512 ; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [10:1.00] 4513 ; SKX-NEXT: retq # sched: [7:1.00] 4514 %vec = load <8 x double>, <8 x double>* %vp 4515 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 4516 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 4517 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 4518 ret <8 x double> %res 4519 } 4520 4521 define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) { 4522 ; GENERIC-LABEL: test_16xi8_perm_mask0: 4523 ; GENERIC: # %bb.0: 4524 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 4525 ; GENERIC-NEXT: retq # sched: [1:1.00] 4526 ; 4527 ; SKX-LABEL: test_16xi8_perm_mask0: 4528 ; SKX: # %bb.0: 4529 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] 4530 ; SKX-NEXT: retq # sched: [7:1.00] 4531 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 4532 ret <16 x i8> %res 4533 } 4534 define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 4535 ; GENERIC-LABEL: test_masked_16xi8_perm_mask0: 4536 ; GENERIC: # %bb.0: 4537 ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] 4538 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 4539 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4540 ; GENERIC-NEXT: retq # sched: [1:1.00] 4541 ; 4542 ; SKX-LABEL: test_masked_16xi8_perm_mask0: 4543 ; SKX: # %bb.0: 4544 ; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] 4545 ; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] 4546 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4547 ; SKX-NEXT: retq # sched: [7:1.00] 4548 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 4549 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4550 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4551 ret <16 x i8> %res 4552 } 4553 4554 define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) { 4555 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0: 4556 ; GENERIC: # %bb.0: 4557 ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4558 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] 4559 ; GENERIC-NEXT: retq # sched: [1:1.00] 4560 ; 4561 ; SKX-LABEL: test_masked_z_16xi8_perm_mask0: 4562 ; SKX: # %bb.0: 4563 ; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4564 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] 4565 ; SKX-NEXT: retq # sched: [7:1.00] 4566 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14> 4567 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4568 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4569 ret <16 x i8> %res 4570 } 4571 define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 4572 ; GENERIC-LABEL: test_masked_16xi8_perm_mask1: 4573 ; GENERIC: # %bb.0: 4574 ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] 4575 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 4576 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4577 ; GENERIC-NEXT: retq # sched: [1:1.00] 4578 ; 4579 ; SKX-LABEL: test_masked_16xi8_perm_mask1: 4580 ; SKX: # %bb.0: 4581 ; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] 4582 ; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:1.00] 4583 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4584 ; SKX-NEXT: retq # sched: [7:1.00] 4585 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 4586 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4587 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4588 ret <16 x i8> %res 4589 } 4590 4591 define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) { 4592 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1: 4593 ; GENERIC: # %bb.0: 4594 ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4595 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] 4596 ; GENERIC-NEXT: retq # sched: [1:1.00] 4597 ; 4598 ; SKX-LABEL: test_masked_z_16xi8_perm_mask1: 4599 ; SKX: # %bb.0: 4600 ; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4601 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:1.00] 4602 ; SKX-NEXT: retq # sched: [7:1.00] 4603 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0> 4604 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4605 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4606 ret <16 x i8> %res 4607 } 4608 define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 4609 ; GENERIC-LABEL: test_masked_16xi8_perm_mask2: 4610 ; GENERIC: # %bb.0: 4611 ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] 4612 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 4613 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4614 ; GENERIC-NEXT: retq # sched: [1:1.00] 4615 ; 4616 ; SKX-LABEL: test_masked_16xi8_perm_mask2: 4617 ; SKX: # %bb.0: 4618 ; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] 4619 ; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:1.00] 4620 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4621 ; SKX-NEXT: retq # sched: [7:1.00] 4622 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 4623 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4624 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4625 ret <16 x i8> %res 4626 } 4627 4628 define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) { 4629 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2: 4630 ; GENERIC: # %bb.0: 4631 ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4632 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] 4633 ; GENERIC-NEXT: retq # sched: [1:1.00] 4634 ; 4635 ; SKX-LABEL: test_masked_z_16xi8_perm_mask2: 4636 ; SKX: # %bb.0: 4637 ; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4638 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:1.00] 4639 ; SKX-NEXT: retq # sched: [7:1.00] 4640 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7> 4641 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4642 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4643 ret <16 x i8> %res 4644 } 4645 define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) { 4646 ; GENERIC-LABEL: test_16xi8_perm_mask3: 4647 ; GENERIC: # %bb.0: 4648 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 4649 ; GENERIC-NEXT: retq # sched: [1:1.00] 4650 ; 4651 ; SKX-LABEL: test_16xi8_perm_mask3: 4652 ; SKX: # %bb.0: 4653 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] 4654 ; SKX-NEXT: retq # sched: [7:1.00] 4655 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 4656 ret <16 x i8> %res 4657 } 4658 define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { 4659 ; GENERIC-LABEL: test_masked_16xi8_perm_mask3: 4660 ; GENERIC: # %bb.0: 4661 ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] 4662 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 4663 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4664 ; GENERIC-NEXT: retq # sched: [1:1.00] 4665 ; 4666 ; SKX-LABEL: test_masked_16xi8_perm_mask3: 4667 ; SKX: # %bb.0: 4668 ; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] 4669 ; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] 4670 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 4671 ; SKX-NEXT: retq # sched: [7:1.00] 4672 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 4673 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4674 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4675 ret <16 x i8> %res 4676 } 4677 4678 define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) { 4679 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3: 4680 ; GENERIC: # %bb.0: 4681 ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4682 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] 4683 ; GENERIC-NEXT: retq # sched: [1:1.00] 4684 ; 4685 ; SKX-LABEL: test_masked_z_16xi8_perm_mask3: 4686 ; SKX: # %bb.0: 4687 ; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4688 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] 4689 ; SKX-NEXT: retq # sched: [7:1.00] 4690 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6> 4691 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4692 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4693 ret <16 x i8> %res 4694 } 4695 define <16 x i8> @test_16xi8_perm_mem_mask0(<16 x i8>* %vp) { 4696 ; GENERIC-LABEL: test_16xi8_perm_mem_mask0: 4697 ; GENERIC: # %bb.0: 4698 ; GENERIC-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4699 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 4700 ; GENERIC-NEXT: retq # sched: [1:1.00] 4701 ; 4702 ; SKX-LABEL: test_16xi8_perm_mem_mask0: 4703 ; SKX: # %bb.0: 4704 ; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4705 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] 4706 ; SKX-NEXT: retq # sched: [7:1.00] 4707 %vec = load <16 x i8>, <16 x i8>* %vp 4708 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 4709 ret <16 x i8> %res 4710 } 4711 define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 4712 ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask0: 4713 ; GENERIC: # %bb.0: 4714 ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4715 ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4716 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 4717 ; GENERIC-NEXT: retq # sched: [1:1.00] 4718 ; 4719 ; SKX-LABEL: test_masked_16xi8_perm_mem_mask0: 4720 ; SKX: # %bb.0: 4721 ; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4722 ; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4723 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] 4724 ; SKX-NEXT: retq # sched: [7:1.00] 4725 %vec = load <16 x i8>, <16 x i8>* %vp 4726 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 4727 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4728 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4729 ret <16 x i8> %res 4730 } 4731 4732 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %mask) { 4733 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask0: 4734 ; GENERIC: # %bb.0: 4735 ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4736 ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] 4737 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] 4738 ; GENERIC-NEXT: retq # sched: [1:1.00] 4739 ; 4740 ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask0: 4741 ; SKX: # %bb.0: 4742 ; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4743 ; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] 4744 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] 4745 ; SKX-NEXT: retq # sched: [7:1.00] 4746 %vec = load <16 x i8>, <16 x i8>* %vp 4747 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13> 4748 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4749 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4750 ret <16 x i8> %res 4751 } 4752 4753 define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 4754 ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask1: 4755 ; GENERIC: # %bb.0: 4756 ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4757 ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4758 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 4759 ; GENERIC-NEXT: retq # sched: [1:1.00] 4760 ; 4761 ; SKX-LABEL: test_masked_16xi8_perm_mem_mask1: 4762 ; SKX: # %bb.0: 4763 ; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4764 ; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4765 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:1.00] 4766 ; SKX-NEXT: retq # sched: [7:1.00] 4767 %vec = load <16 x i8>, <16 x i8>* %vp 4768 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 4769 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4770 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4771 ret <16 x i8> %res 4772 } 4773 4774 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %mask) { 4775 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask1: 4776 ; GENERIC: # %bb.0: 4777 ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4778 ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] 4779 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] 4780 ; GENERIC-NEXT: retq # sched: [1:1.00] 4781 ; 4782 ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask1: 4783 ; SKX: # %bb.0: 4784 ; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4785 ; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] 4786 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:1.00] 4787 ; SKX-NEXT: retq # sched: [7:1.00] 4788 %vec = load <16 x i8>, <16 x i8>* %vp 4789 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11> 4790 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4791 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4792 ret <16 x i8> %res 4793 } 4794 4795 define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 4796 ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask2: 4797 ; GENERIC: # %bb.0: 4798 ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4799 ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4800 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 4801 ; GENERIC-NEXT: retq # sched: [1:1.00] 4802 ; 4803 ; SKX-LABEL: test_masked_16xi8_perm_mem_mask2: 4804 ; SKX: # %bb.0: 4805 ; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4806 ; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4807 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:1.00] 4808 ; SKX-NEXT: retq # sched: [7:1.00] 4809 %vec = load <16 x i8>, <16 x i8>* %vp 4810 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 4811 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4812 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4813 ret <16 x i8> %res 4814 } 4815 4816 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %mask) { 4817 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask2: 4818 ; GENERIC: # %bb.0: 4819 ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4820 ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] 4821 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] 4822 ; GENERIC-NEXT: retq # sched: [1:1.00] 4823 ; 4824 ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask2: 4825 ; SKX: # %bb.0: 4826 ; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4827 ; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] 4828 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:1.00] 4829 ; SKX-NEXT: retq # sched: [7:1.00] 4830 %vec = load <16 x i8>, <16 x i8>* %vp 4831 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9> 4832 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4833 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4834 ret <16 x i8> %res 4835 } 4836 4837 define <16 x i8> @test_16xi8_perm_mem_mask3(<16 x i8>* %vp) { 4838 ; GENERIC-LABEL: test_16xi8_perm_mem_mask3: 4839 ; GENERIC: # %bb.0: 4840 ; GENERIC-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4841 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 4842 ; GENERIC-NEXT: retq # sched: [1:1.00] 4843 ; 4844 ; SKX-LABEL: test_16xi8_perm_mem_mask3: 4845 ; SKX: # %bb.0: 4846 ; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4847 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] 4848 ; SKX-NEXT: retq # sched: [7:1.00] 4849 %vec = load <16 x i8>, <16 x i8>* %vp 4850 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 4851 ret <16 x i8> %res 4852 } 4853 define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { 4854 ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask3: 4855 ; GENERIC: # %bb.0: 4856 ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4857 ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] 4858 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 4859 ; GENERIC-NEXT: retq # sched: [1:1.00] 4860 ; 4861 ; SKX-LABEL: test_masked_16xi8_perm_mem_mask3: 4862 ; SKX: # %bb.0: 4863 ; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] 4864 ; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] 4865 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] 4866 ; SKX-NEXT: retq # sched: [7:1.00] 4867 %vec = load <16 x i8>, <16 x i8>* %vp 4868 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 4869 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4870 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 4871 ret <16 x i8> %res 4872 } 4873 4874 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %mask) { 4875 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask3: 4876 ; GENERIC: # %bb.0: 4877 ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4878 ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] 4879 ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] 4880 ; GENERIC-NEXT: retq # sched: [1:1.00] 4881 ; 4882 ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask3: 4883 ; SKX: # %bb.0: 4884 ; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] 4885 ; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] 4886 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] 4887 ; SKX-NEXT: retq # sched: [7:1.00] 4888 %vec = load <16 x i8>, <16 x i8>* %vp 4889 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4> 4890 %cmp = icmp eq <16 x i8> %mask, zeroinitializer 4891 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer 4892 ret <16 x i8> %res 4893 } 4894 4895 define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { 4896 ; GENERIC-LABEL: test_32xi8_perm_mask0: 4897 ; GENERIC: # %bb.0: 4898 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50] 4899 ; GENERIC-NEXT: retq # sched: [1:1.00] 4900 ; 4901 ; SKX-LABEL: test_32xi8_perm_mask0: 4902 ; SKX: # %bb.0: 4903 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] 4904 ; SKX-NEXT: retq # sched: [7:1.00] 4905 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 4906 ret <32 x i8> %res 4907 } 4908 define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 4909 ; GENERIC-LABEL: test_masked_32xi8_perm_mask0: 4910 ; GENERIC: # %bb.0: 4911 ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] 4912 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50] 4913 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 4914 ; GENERIC-NEXT: retq # sched: [1:1.00] 4915 ; 4916 ; SKX-LABEL: test_masked_32xi8_perm_mask0: 4917 ; SKX: # %bb.0: 4918 ; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] 4919 ; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] 4920 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 4921 ; SKX-NEXT: retq # sched: [7:1.00] 4922 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 4923 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 4924 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 4925 ret <32 x i8> %res 4926 } 4927 4928 define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) { 4929 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0: 4930 ; GENERIC: # %bb.0: 4931 ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 4932 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50] 4933 ; GENERIC-NEXT: retq # sched: [1:1.00] 4934 ; 4935 ; SKX-LABEL: test_masked_z_32xi8_perm_mask0: 4936 ; SKX: # %bb.0: 4937 ; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 4938 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] 4939 ; SKX-NEXT: retq # sched: [7:1.00] 4940 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21> 4941 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 4942 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 4943 ret <32 x i8> %res 4944 } 4945 define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 4946 ; GENERIC-LABEL: test_masked_32xi8_perm_mask1: 4947 ; GENERIC: # %bb.0: 4948 ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] 4949 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:0.50] 4950 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 4951 ; GENERIC-NEXT: retq # sched: [1:1.00] 4952 ; 4953 ; SKX-LABEL: test_masked_32xi8_perm_mask1: 4954 ; SKX: # %bb.0: 4955 ; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] 4956 ; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:1.00] 4957 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 4958 ; SKX-NEXT: retq # sched: [7:1.00] 4959 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24> 4960 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 4961 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 4962 ret <32 x i8> %res 4963 } 4964 4965 define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) { 4966 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1: 4967 ; GENERIC: # %bb.0: 4968 ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 4969 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:0.50] 4970 ; GENERIC-NEXT: retq # sched: [1:1.00] 4971 ; 4972 ; SKX-LABEL: test_masked_z_32xi8_perm_mask1: 4973 ; SKX: # %bb.0: 4974 ; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 4975 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:1.00] 4976 ; SKX-NEXT: retq # sched: [7:1.00] 4977 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24> 4978 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 4979 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 4980 ret <32 x i8> %res 4981 } 4982 define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 4983 ; GENERIC-LABEL: test_masked_32xi8_perm_mask2: 4984 ; GENERIC: # %bb.0: 4985 ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] 4986 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:0.50] 4987 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 4988 ; GENERIC-NEXT: retq # sched: [1:1.00] 4989 ; 4990 ; SKX-LABEL: test_masked_32xi8_perm_mask2: 4991 ; SKX: # %bb.0: 4992 ; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] 4993 ; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:1.00] 4994 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 4995 ; SKX-NEXT: retq # sched: [7:1.00] 4996 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29> 4997 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 4998 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 4999 ret <32 x i8> %res 5000 } 5001 5002 define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) { 5003 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2: 5004 ; GENERIC: # %bb.0: 5005 ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5006 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:0.50] 5007 ; GENERIC-NEXT: retq # sched: [1:1.00] 5008 ; 5009 ; SKX-LABEL: test_masked_z_32xi8_perm_mask2: 5010 ; SKX: # %bb.0: 5011 ; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5012 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:1.00] 5013 ; SKX-NEXT: retq # sched: [7:1.00] 5014 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29> 5015 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5016 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5017 ret <32 x i8> %res 5018 } 5019 define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { 5020 ; GENERIC-LABEL: test_32xi8_perm_mask3: 5021 ; GENERIC: # %bb.0: 5022 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50] 5023 ; GENERIC-NEXT: retq # sched: [1:1.00] 5024 ; 5025 ; SKX-LABEL: test_32xi8_perm_mask3: 5026 ; SKX: # %bb.0: 5027 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] 5028 ; SKX-NEXT: retq # sched: [7:1.00] 5029 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 5030 ret <32 x i8> %res 5031 } 5032 define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { 5033 ; GENERIC-LABEL: test_masked_32xi8_perm_mask3: 5034 ; GENERIC: # %bb.0: 5035 ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] 5036 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50] 5037 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 5038 ; GENERIC-NEXT: retq # sched: [1:1.00] 5039 ; 5040 ; SKX-LABEL: test_masked_32xi8_perm_mask3: 5041 ; SKX: # %bb.0: 5042 ; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] 5043 ; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] 5044 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 5045 ; SKX-NEXT: retq # sched: [7:1.00] 5046 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 5047 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5048 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 5049 ret <32 x i8> %res 5050 } 5051 5052 define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) { 5053 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3: 5054 ; GENERIC: # %bb.0: 5055 ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5056 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50] 5057 ; GENERIC-NEXT: retq # sched: [1:1.00] 5058 ; 5059 ; SKX-LABEL: test_masked_z_32xi8_perm_mask3: 5060 ; SKX: # %bb.0: 5061 ; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5062 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] 5063 ; SKX-NEXT: retq # sched: [7:1.00] 5064 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18> 5065 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5066 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5067 ret <32 x i8> %res 5068 } 5069 define <32 x i8> @test_32xi8_perm_mem_mask0(<32 x i8>* %vp) { 5070 ; GENERIC-LABEL: test_32xi8_perm_mem_mask0: 5071 ; GENERIC: # %bb.0: 5072 ; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] 5073 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50] 5074 ; GENERIC-NEXT: retq # sched: [1:1.00] 5075 ; 5076 ; SKX-LABEL: test_32xi8_perm_mem_mask0: 5077 ; SKX: # %bb.0: 5078 ; SKX-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] 5079 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] 5080 ; SKX-NEXT: retq # sched: [7:1.00] 5081 %vec = load <32 x i8>, <32 x i8>* %vp 5082 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 5083 ret <32 x i8> %res 5084 } 5085 define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 5086 ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask0: 5087 ; GENERIC: # %bb.0: 5088 ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5089 ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5090 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50] 5091 ; GENERIC-NEXT: retq # sched: [1:1.00] 5092 ; 5093 ; SKX-LABEL: test_masked_32xi8_perm_mem_mask0: 5094 ; SKX: # %bb.0: 5095 ; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5096 ; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5097 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] 5098 ; SKX-NEXT: retq # sched: [7:1.00] 5099 %vec = load <32 x i8>, <32 x i8>* %vp 5100 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 5101 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5102 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 5103 ret <32 x i8> %res 5104 } 5105 5106 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %mask) { 5107 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask0: 5108 ; GENERIC: # %bb.0: 5109 ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5110 ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] 5111 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50] 5112 ; GENERIC-NEXT: retq # sched: [1:1.00] 5113 ; 5114 ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask0: 5115 ; SKX: # %bb.0: 5116 ; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5117 ; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] 5118 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] 5119 ; SKX-NEXT: retq # sched: [7:1.00] 5120 %vec = load <32 x i8>, <32 x i8>* %vp 5121 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22> 5122 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5123 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5124 ret <32 x i8> %res 5125 } 5126 5127 define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 5128 ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask1: 5129 ; GENERIC: # %bb.0: 5130 ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5131 ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5132 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:0.50] 5133 ; GENERIC-NEXT: retq # sched: [1:1.00] 5134 ; 5135 ; SKX-LABEL: test_masked_32xi8_perm_mem_mask1: 5136 ; SKX: # %bb.0: 5137 ; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5138 ; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5139 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:1.00] 5140 ; SKX-NEXT: retq # sched: [7:1.00] 5141 %vec = load <32 x i8>, <32 x i8>* %vp 5142 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19> 5143 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5144 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 5145 ret <32 x i8> %res 5146 } 5147 5148 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %mask) { 5149 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask1: 5150 ; GENERIC: # %bb.0: 5151 ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5152 ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] 5153 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:0.50] 5154 ; GENERIC-NEXT: retq # sched: [1:1.00] 5155 ; 5156 ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask1: 5157 ; SKX: # %bb.0: 5158 ; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5159 ; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] 5160 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:1.00] 5161 ; SKX-NEXT: retq # sched: [7:1.00] 5162 %vec = load <32 x i8>, <32 x i8>* %vp 5163 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19> 5164 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5165 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5166 ret <32 x i8> %res 5167 } 5168 5169 define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 5170 ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask2: 5171 ; GENERIC: # %bb.0: 5172 ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5173 ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5174 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:0.50] 5175 ; GENERIC-NEXT: retq # sched: [1:1.00] 5176 ; 5177 ; SKX-LABEL: test_masked_32xi8_perm_mem_mask2: 5178 ; SKX: # %bb.0: 5179 ; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5180 ; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5181 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:1.00] 5182 ; SKX-NEXT: retq # sched: [7:1.00] 5183 %vec = load <32 x i8>, <32 x i8>* %vp 5184 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28> 5185 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5186 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 5187 ret <32 x i8> %res 5188 } 5189 5190 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %mask) { 5191 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask2: 5192 ; GENERIC: # %bb.0: 5193 ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5194 ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] 5195 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:0.50] 5196 ; GENERIC-NEXT: retq # sched: [1:1.00] 5197 ; 5198 ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask2: 5199 ; SKX: # %bb.0: 5200 ; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5201 ; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] 5202 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:1.00] 5203 ; SKX-NEXT: retq # sched: [7:1.00] 5204 %vec = load <32 x i8>, <32 x i8>* %vp 5205 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28> 5206 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5207 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5208 ret <32 x i8> %res 5209 } 5210 5211 define <32 x i8> @test_32xi8_perm_mem_mask3(<32 x i8>* %vp) { 5212 ; GENERIC-LABEL: test_32xi8_perm_mem_mask3: 5213 ; GENERIC: # %bb.0: 5214 ; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] 5215 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50] 5216 ; GENERIC-NEXT: retq # sched: [1:1.00] 5217 ; 5218 ; SKX-LABEL: test_32xi8_perm_mem_mask3: 5219 ; SKX: # %bb.0: 5220 ; SKX-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] 5221 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] 5222 ; SKX-NEXT: retq # sched: [7:1.00] 5223 %vec = load <32 x i8>, <32 x i8>* %vp 5224 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 5225 ret <32 x i8> %res 5226 } 5227 define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { 5228 ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask3: 5229 ; GENERIC: # %bb.0: 5230 ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5231 ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] 5232 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50] 5233 ; GENERIC-NEXT: retq # sched: [1:1.00] 5234 ; 5235 ; SKX-LABEL: test_masked_32xi8_perm_mem_mask3: 5236 ; SKX: # %bb.0: 5237 ; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] 5238 ; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] 5239 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] 5240 ; SKX-NEXT: retq # sched: [7:1.00] 5241 %vec = load <32 x i8>, <32 x i8>* %vp 5242 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 5243 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5244 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 5245 ret <32 x i8> %res 5246 } 5247 5248 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %mask) { 5249 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask3: 5250 ; GENERIC: # %bb.0: 5251 ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5252 ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] 5253 ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50] 5254 ; GENERIC-NEXT: retq # sched: [1:1.00] 5255 ; 5256 ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask3: 5257 ; SKX: # %bb.0: 5258 ; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] 5259 ; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] 5260 ; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] 5261 ; SKX-NEXT: retq # sched: [7:1.00] 5262 %vec = load <32 x i8>, <32 x i8>* %vp 5263 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29> 5264 %cmp = icmp eq <32 x i8> %mask, zeroinitializer 5265 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer 5266 ret <32 x i8> %res 5267 } 5268 5269 define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { 5270 ; GENERIC-LABEL: test_64xi8_perm_mask0: 5271 ; GENERIC: # %bb.0: 5272 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] 5273 ; GENERIC-NEXT: retq # sched: [1:1.00] 5274 ; 5275 ; SKX-LABEL: test_64xi8_perm_mask0: 5276 ; SKX: # %bb.0: 5277 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] 5278 ; SKX-NEXT: retq # sched: [7:1.00] 5279 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 5280 ret <64 x i8> %res 5281 } 5282 define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 5283 ; GENERIC-LABEL: test_masked_64xi8_perm_mask0: 5284 ; GENERIC: # %bb.0: 5285 ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] 5286 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] 5287 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 5288 ; GENERIC-NEXT: retq # sched: [1:1.00] 5289 ; 5290 ; SKX-LABEL: test_masked_64xi8_perm_mask0: 5291 ; SKX: # %bb.0: 5292 ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] 5293 ; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] 5294 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 5295 ; SKX-NEXT: retq # sched: [7:1.00] 5296 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 5297 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5298 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5299 ret <64 x i8> %res 5300 } 5301 5302 define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) { 5303 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0: 5304 ; GENERIC: # %bb.0: 5305 ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5306 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] 5307 ; GENERIC-NEXT: retq # sched: [1:1.00] 5308 ; 5309 ; SKX-LABEL: test_masked_z_64xi8_perm_mask0: 5310 ; SKX: # %bb.0: 5311 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5312 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] 5313 ; SKX-NEXT: retq # sched: [7:1.00] 5314 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62> 5315 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5316 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5317 ret <64 x i8> %res 5318 } 5319 define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 5320 ; GENERIC-LABEL: test_masked_64xi8_perm_mask1: 5321 ; GENERIC: # %bb.0: 5322 ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] 5323 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:0.50] 5324 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 5325 ; GENERIC-NEXT: retq # sched: [1:1.00] 5326 ; 5327 ; SKX-LABEL: test_masked_64xi8_perm_mask1: 5328 ; SKX: # %bb.0: 5329 ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] 5330 ; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:1.00] 5331 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 5332 ; SKX-NEXT: retq # sched: [7:1.00] 5333 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49> 5334 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5335 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5336 ret <64 x i8> %res 5337 } 5338 5339 define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) { 5340 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1: 5341 ; GENERIC: # %bb.0: 5342 ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5343 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:0.50] 5344 ; GENERIC-NEXT: retq # sched: [1:1.00] 5345 ; 5346 ; SKX-LABEL: test_masked_z_64xi8_perm_mask1: 5347 ; SKX: # %bb.0: 5348 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5349 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:1.00] 5350 ; SKX-NEXT: retq # sched: [7:1.00] 5351 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49> 5352 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5353 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5354 ret <64 x i8> %res 5355 } 5356 define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 5357 ; GENERIC-LABEL: test_masked_64xi8_perm_mask2: 5358 ; GENERIC: # %bb.0: 5359 ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] 5360 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:0.50] 5361 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 5362 ; GENERIC-NEXT: retq # sched: [1:1.00] 5363 ; 5364 ; SKX-LABEL: test_masked_64xi8_perm_mask2: 5365 ; SKX: # %bb.0: 5366 ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] 5367 ; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:1.00] 5368 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 5369 ; SKX-NEXT: retq # sched: [7:1.00] 5370 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60> 5371 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5372 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5373 ret <64 x i8> %res 5374 } 5375 5376 define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) { 5377 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2: 5378 ; GENERIC: # %bb.0: 5379 ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5380 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:0.50] 5381 ; GENERIC-NEXT: retq # sched: [1:1.00] 5382 ; 5383 ; SKX-LABEL: test_masked_z_64xi8_perm_mask2: 5384 ; SKX: # %bb.0: 5385 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5386 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:1.00] 5387 ; SKX-NEXT: retq # sched: [7:1.00] 5388 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60> 5389 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5390 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5391 ret <64 x i8> %res 5392 } 5393 define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { 5394 ; GENERIC-LABEL: test_64xi8_perm_mask3: 5395 ; GENERIC: # %bb.0: 5396 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] 5397 ; GENERIC-NEXT: retq # sched: [1:1.00] 5398 ; 5399 ; SKX-LABEL: test_64xi8_perm_mask3: 5400 ; SKX: # %bb.0: 5401 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] 5402 ; SKX-NEXT: retq # sched: [7:1.00] 5403 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 5404 ret <64 x i8> %res 5405 } 5406 define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { 5407 ; GENERIC-LABEL: test_masked_64xi8_perm_mask3: 5408 ; GENERIC: # %bb.0: 5409 ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] 5410 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] 5411 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 5412 ; GENERIC-NEXT: retq # sched: [1:1.00] 5413 ; 5414 ; SKX-LABEL: test_masked_64xi8_perm_mask3: 5415 ; SKX: # %bb.0: 5416 ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] 5417 ; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] 5418 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 5419 ; SKX-NEXT: retq # sched: [7:1.00] 5420 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 5421 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5422 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5423 ret <64 x i8> %res 5424 } 5425 5426 define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) { 5427 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3: 5428 ; GENERIC: # %bb.0: 5429 ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5430 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] 5431 ; GENERIC-NEXT: retq # sched: [1:1.00] 5432 ; 5433 ; SKX-LABEL: test_masked_z_64xi8_perm_mask3: 5434 ; SKX: # %bb.0: 5435 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5436 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] 5437 ; SKX-NEXT: retq # sched: [7:1.00] 5438 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61> 5439 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5440 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5441 ret <64 x i8> %res 5442 } 5443 define <64 x i8> @test_64xi8_perm_mem_mask0(<64 x i8>* %vp) { 5444 ; GENERIC-LABEL: test_64xi8_perm_mem_mask0: 5445 ; GENERIC: # %bb.0: 5446 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [7:0.50] 5447 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] 5448 ; GENERIC-NEXT: retq # sched: [1:1.00] 5449 ; 5450 ; SKX-LABEL: test_64xi8_perm_mem_mask0: 5451 ; SKX: # %bb.0: 5452 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] 5453 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] 5454 ; SKX-NEXT: retq # sched: [7:1.00] 5455 %vec = load <64 x i8>, <64 x i8>* %vp 5456 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 5457 ret <64 x i8> %res 5458 } 5459 define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 5460 ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask0: 5461 ; GENERIC: # %bb.0: 5462 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] 5463 ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5464 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] 5465 ; GENERIC-NEXT: retq # sched: [1:1.00] 5466 ; 5467 ; SKX-LABEL: test_masked_64xi8_perm_mem_mask0: 5468 ; SKX: # %bb.0: 5469 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] 5470 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5471 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] 5472 ; SKX-NEXT: retq # sched: [7:1.00] 5473 %vec = load <64 x i8>, <64 x i8>* %vp 5474 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 5475 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5476 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5477 ret <64 x i8> %res 5478 } 5479 5480 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %mask) { 5481 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask0: 5482 ; GENERIC: # %bb.0: 5483 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] 5484 ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] 5485 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] 5486 ; GENERIC-NEXT: retq # sched: [1:1.00] 5487 ; 5488 ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask0: 5489 ; SKX: # %bb.0: 5490 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] 5491 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] 5492 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] 5493 ; SKX-NEXT: retq # sched: [7:1.00] 5494 %vec = load <64 x i8>, <64 x i8>* %vp 5495 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58> 5496 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5497 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5498 ret <64 x i8> %res 5499 } 5500 5501 define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 5502 ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask1: 5503 ; GENERIC: # %bb.0: 5504 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] 5505 ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5506 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:0.50] 5507 ; GENERIC-NEXT: retq # sched: [1:1.00] 5508 ; 5509 ; SKX-LABEL: test_masked_64xi8_perm_mem_mask1: 5510 ; SKX: # %bb.0: 5511 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] 5512 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5513 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:1.00] 5514 ; SKX-NEXT: retq # sched: [7:1.00] 5515 %vec = load <64 x i8>, <64 x i8>* %vp 5516 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49> 5517 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5518 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5519 ret <64 x i8> %res 5520 } 5521 5522 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %mask) { 5523 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask1: 5524 ; GENERIC: # %bb.0: 5525 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] 5526 ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] 5527 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:0.50] 5528 ; GENERIC-NEXT: retq # sched: [1:1.00] 5529 ; 5530 ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask1: 5531 ; SKX: # %bb.0: 5532 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] 5533 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] 5534 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:1.00] 5535 ; SKX-NEXT: retq # sched: [7:1.00] 5536 %vec = load <64 x i8>, <64 x i8>* %vp 5537 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49> 5538 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5539 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5540 ret <64 x i8> %res 5541 } 5542 5543 define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 5544 ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask2: 5545 ; GENERIC: # %bb.0: 5546 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] 5547 ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5548 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:0.50] 5549 ; GENERIC-NEXT: retq # sched: [1:1.00] 5550 ; 5551 ; SKX-LABEL: test_masked_64xi8_perm_mem_mask2: 5552 ; SKX: # %bb.0: 5553 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] 5554 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5555 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:1.00] 5556 ; SKX-NEXT: retq # sched: [7:1.00] 5557 %vec = load <64 x i8>, <64 x i8>* %vp 5558 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61> 5559 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5560 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5561 ret <64 x i8> %res 5562 } 5563 5564 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %mask) { 5565 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask2: 5566 ; GENERIC: # %bb.0: 5567 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] 5568 ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] 5569 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:0.50] 5570 ; GENERIC-NEXT: retq # sched: [1:1.00] 5571 ; 5572 ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask2: 5573 ; SKX: # %bb.0: 5574 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] 5575 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] 5576 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:1.00] 5577 ; SKX-NEXT: retq # sched: [7:1.00] 5578 %vec = load <64 x i8>, <64 x i8>* %vp 5579 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61> 5580 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5581 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5582 ret <64 x i8> %res 5583 } 5584 5585 define <64 x i8> @test_64xi8_perm_mem_mask3(<64 x i8>* %vp) { 5586 ; GENERIC-LABEL: test_64xi8_perm_mem_mask3: 5587 ; GENERIC: # %bb.0: 5588 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [7:0.50] 5589 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] 5590 ; GENERIC-NEXT: retq # sched: [1:1.00] 5591 ; 5592 ; SKX-LABEL: test_64xi8_perm_mem_mask3: 5593 ; SKX: # %bb.0: 5594 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] 5595 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] 5596 ; SKX-NEXT: retq # sched: [7:1.00] 5597 %vec = load <64 x i8>, <64 x i8>* %vp 5598 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 5599 ret <64 x i8> %res 5600 } 5601 define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { 5602 ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask3: 5603 ; GENERIC: # %bb.0: 5604 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] 5605 ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] 5606 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] 5607 ; GENERIC-NEXT: retq # sched: [1:1.00] 5608 ; 5609 ; SKX-LABEL: test_masked_64xi8_perm_mem_mask3: 5610 ; SKX: # %bb.0: 5611 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] 5612 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] 5613 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] 5614 ; SKX-NEXT: retq # sched: [7:1.00] 5615 %vec = load <64 x i8>, <64 x i8>* %vp 5616 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 5617 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5618 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 5619 ret <64 x i8> %res 5620 } 5621 5622 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %mask) { 5623 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask3: 5624 ; GENERIC: # %bb.0: 5625 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] 5626 ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] 5627 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] 5628 ; GENERIC-NEXT: retq # sched: [1:1.00] 5629 ; 5630 ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask3: 5631 ; SKX: # %bb.0: 5632 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] 5633 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] 5634 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] 5635 ; SKX-NEXT: retq # sched: [7:1.00] 5636 %vec = load <64 x i8>, <64 x i8>* %vp 5637 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60> 5638 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 5639 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer 5640 ret <64 x i8> %res 5641 } 5642 5643 define <8 x i16> @test_8xi16_perm_high_mask0(<8 x i16> %vec) { 5644 ; GENERIC-LABEL: test_8xi16_perm_high_mask0: 5645 ; GENERIC: # %bb.0: 5646 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] 5647 ; GENERIC-NEXT: retq # sched: [1:1.00] 5648 ; 5649 ; SKX-LABEL: test_8xi16_perm_high_mask0: 5650 ; SKX: # %bb.0: 5651 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] 5652 ; SKX-NEXT: retq # sched: [7:1.00] 5653 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 5654 ret <8 x i16> %res 5655 } 5656 define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5657 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0: 5658 ; GENERIC: # %bb.0: 5659 ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5660 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] 5661 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5662 ; GENERIC-NEXT: retq # sched: [1:1.00] 5663 ; 5664 ; SKX-LABEL: test_masked_8xi16_perm_high_mask0: 5665 ; SKX: # %bb.0: 5666 ; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5667 ; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] 5668 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5669 ; SKX-NEXT: retq # sched: [7:1.00] 5670 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 5671 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5672 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5673 ret <8 x i16> %res 5674 } 5675 5676 define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) { 5677 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0: 5678 ; GENERIC: # %bb.0: 5679 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5680 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] 5681 ; GENERIC-NEXT: retq # sched: [1:1.00] 5682 ; 5683 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask0: 5684 ; SKX: # %bb.0: 5685 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5686 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] 5687 ; SKX-NEXT: retq # sched: [7:1.00] 5688 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6> 5689 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5690 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5691 ret <8 x i16> %res 5692 } 5693 define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5694 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1: 5695 ; GENERIC: # %bb.0: 5696 ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5697 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50] 5698 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5699 ; GENERIC-NEXT: retq # sched: [1:1.00] 5700 ; 5701 ; SKX-LABEL: test_masked_8xi16_perm_low_mask1: 5702 ; SKX: # %bb.0: 5703 ; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5704 ; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] 5705 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5706 ; SKX-NEXT: retq # sched: [7:1.00] 5707 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 5708 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5709 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5710 ret <8 x i16> %res 5711 } 5712 5713 define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) { 5714 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1: 5715 ; GENERIC: # %bb.0: 5716 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5717 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50] 5718 ; GENERIC-NEXT: retq # sched: [1:1.00] 5719 ; 5720 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask1: 5721 ; SKX: # %bb.0: 5722 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5723 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] 5724 ; SKX-NEXT: retq # sched: [7:1.00] 5725 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 5726 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5727 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5728 ret <8 x i16> %res 5729 } 5730 define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5731 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2: 5732 ; GENERIC: # %bb.0: 5733 ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5734 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50] 5735 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5736 ; GENERIC-NEXT: retq # sched: [1:1.00] 5737 ; 5738 ; SKX-LABEL: test_masked_8xi16_perm_high_mask2: 5739 ; SKX: # %bb.0: 5740 ; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5741 ; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] 5742 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5743 ; SKX-NEXT: retq # sched: [7:1.00] 5744 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5> 5745 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5746 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5747 ret <8 x i16> %res 5748 } 5749 5750 define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) { 5751 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2: 5752 ; GENERIC: # %bb.0: 5753 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5754 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50] 5755 ; GENERIC-NEXT: retq # sched: [1:1.00] 5756 ; 5757 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask2: 5758 ; SKX: # %bb.0: 5759 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5760 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] 5761 ; SKX-NEXT: retq # sched: [7:1.00] 5762 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5> 5763 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5764 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5765 ret <8 x i16> %res 5766 } 5767 define <8 x i16> @test_8xi16_perm_low_mask3(<8 x i16> %vec) { 5768 ; GENERIC-LABEL: test_8xi16_perm_low_mask3: 5769 ; GENERIC: # %bb.0: 5770 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] 5771 ; GENERIC-NEXT: retq # sched: [1:1.00] 5772 ; 5773 ; SKX-LABEL: test_8xi16_perm_low_mask3: 5774 ; SKX: # %bb.0: 5775 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] 5776 ; SKX-NEXT: retq # sched: [7:1.00] 5777 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 5778 ret <8 x i16> %res 5779 } 5780 define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5781 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3: 5782 ; GENERIC: # %bb.0: 5783 ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5784 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] 5785 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5786 ; GENERIC-NEXT: retq # sched: [1:1.00] 5787 ; 5788 ; SKX-LABEL: test_masked_8xi16_perm_low_mask3: 5789 ; SKX: # %bb.0: 5790 ; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5791 ; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] 5792 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5793 ; SKX-NEXT: retq # sched: [7:1.00] 5794 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 5795 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5796 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5797 ret <8 x i16> %res 5798 } 5799 5800 define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) { 5801 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3: 5802 ; GENERIC: # %bb.0: 5803 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5804 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] 5805 ; GENERIC-NEXT: retq # sched: [1:1.00] 5806 ; 5807 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask3: 5808 ; SKX: # %bb.0: 5809 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5810 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] 5811 ; SKX-NEXT: retq # sched: [7:1.00] 5812 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7> 5813 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5814 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5815 ret <8 x i16> %res 5816 } 5817 define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5818 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4: 5819 ; GENERIC: # %bb.0: 5820 ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5821 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50] 5822 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5823 ; GENERIC-NEXT: retq # sched: [1:1.00] 5824 ; 5825 ; SKX-LABEL: test_masked_8xi16_perm_high_mask4: 5826 ; SKX: # %bb.0: 5827 ; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5828 ; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] 5829 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5830 ; SKX-NEXT: retq # sched: [7:1.00] 5831 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6> 5832 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5833 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5834 ret <8 x i16> %res 5835 } 5836 5837 define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) { 5838 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4: 5839 ; GENERIC: # %bb.0: 5840 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5841 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50] 5842 ; GENERIC-NEXT: retq # sched: [1:1.00] 5843 ; 5844 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask4: 5845 ; SKX: # %bb.0: 5846 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5847 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] 5848 ; SKX-NEXT: retq # sched: [7:1.00] 5849 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6> 5850 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5851 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5852 ret <8 x i16> %res 5853 } 5854 define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5855 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5: 5856 ; GENERIC: # %bb.0: 5857 ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5858 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50] 5859 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5860 ; GENERIC-NEXT: retq # sched: [1:1.00] 5861 ; 5862 ; SKX-LABEL: test_masked_8xi16_perm_low_mask5: 5863 ; SKX: # %bb.0: 5864 ; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5865 ; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] 5866 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5867 ; SKX-NEXT: retq # sched: [7:1.00] 5868 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7> 5869 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5870 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5871 ret <8 x i16> %res 5872 } 5873 5874 define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) { 5875 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5: 5876 ; GENERIC: # %bb.0: 5877 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5878 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50] 5879 ; GENERIC-NEXT: retq # sched: [1:1.00] 5880 ; 5881 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask5: 5882 ; SKX: # %bb.0: 5883 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5884 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] 5885 ; SKX-NEXT: retq # sched: [7:1.00] 5886 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7> 5887 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5888 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5889 ret <8 x i16> %res 5890 } 5891 define <8 x i16> @test_8xi16_perm_high_mask6(<8 x i16> %vec) { 5892 ; GENERIC-LABEL: test_8xi16_perm_high_mask6: 5893 ; GENERIC: # %bb.0: 5894 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] 5895 ; GENERIC-NEXT: retq # sched: [1:1.00] 5896 ; 5897 ; SKX-LABEL: test_8xi16_perm_high_mask6: 5898 ; SKX: # %bb.0: 5899 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] 5900 ; SKX-NEXT: retq # sched: [7:1.00] 5901 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 5902 ret <8 x i16> %res 5903 } 5904 define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5905 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6: 5906 ; GENERIC: # %bb.0: 5907 ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5908 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] 5909 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5910 ; GENERIC-NEXT: retq # sched: [1:1.00] 5911 ; 5912 ; SKX-LABEL: test_masked_8xi16_perm_high_mask6: 5913 ; SKX: # %bb.0: 5914 ; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5915 ; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] 5916 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5917 ; SKX-NEXT: retq # sched: [7:1.00] 5918 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 5919 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5920 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5921 ret <8 x i16> %res 5922 } 5923 5924 define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) { 5925 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6: 5926 ; GENERIC: # %bb.0: 5927 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5928 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] 5929 ; GENERIC-NEXT: retq # sched: [1:1.00] 5930 ; 5931 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask6: 5932 ; SKX: # %bb.0: 5933 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5934 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] 5935 ; SKX-NEXT: retq # sched: [7:1.00] 5936 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5> 5937 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5938 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5939 ret <8 x i16> %res 5940 } 5941 define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { 5942 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7: 5943 ; GENERIC: # %bb.0: 5944 ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] 5945 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50] 5946 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5947 ; GENERIC-NEXT: retq # sched: [1:1.00] 5948 ; 5949 ; SKX-LABEL: test_masked_8xi16_perm_low_mask7: 5950 ; SKX: # %bb.0: 5951 ; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] 5952 ; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] 5953 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 5954 ; SKX-NEXT: retq # sched: [7:1.00] 5955 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 5956 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5957 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 5958 ret <8 x i16> %res 5959 } 5960 5961 define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) { 5962 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7: 5963 ; GENERIC: # %bb.0: 5964 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5965 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50] 5966 ; GENERIC-NEXT: retq # sched: [1:1.00] 5967 ; 5968 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask7: 5969 ; SKX: # %bb.0: 5970 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 5971 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] 5972 ; SKX-NEXT: retq # sched: [7:1.00] 5973 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 5974 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 5975 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 5976 ret <8 x i16> %res 5977 } 5978 define <8 x i16> @test_8xi16_perm_high_mem_mask0(<8 x i16>* %vp) { 5979 ; GENERIC-LABEL: test_8xi16_perm_high_mem_mask0: 5980 ; GENERIC: # %bb.0: 5981 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] 5982 ; GENERIC-NEXT: retq # sched: [1:1.00] 5983 ; 5984 ; SKX-LABEL: test_8xi16_perm_high_mem_mask0: 5985 ; SKX: # %bb.0: 5986 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] 5987 ; SKX-NEXT: retq # sched: [7:1.00] 5988 %vec = load <8 x i16>, <8 x i16>* %vp 5989 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 5990 ret <8 x i16> %res 5991 } 5992 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 5993 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0: 5994 ; GENERIC: # %bb.0: 5995 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 5996 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] 5997 ; GENERIC-NEXT: retq # sched: [1:1.00] 5998 ; 5999 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask0: 6000 ; SKX: # %bb.0: 6001 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6002 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] 6003 ; SKX-NEXT: retq # sched: [7:1.00] 6004 %vec = load <8 x i16>, <8 x i16>* %vp 6005 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 6006 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6007 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6008 ret <8 x i16> %res 6009 } 6010 6011 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %mask) { 6012 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: 6013 ; GENERIC: # %bb.0: 6014 ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6015 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] 6016 ; GENERIC-NEXT: retq # sched: [1:1.00] 6017 ; 6018 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: 6019 ; SKX: # %bb.0: 6020 ; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6021 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] 6022 ; SKX-NEXT: retq # sched: [7:1.00] 6023 %vec = load <8 x i16>, <8 x i16>* %vp 6024 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6> 6025 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6026 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6027 ret <8 x i16> %res 6028 } 6029 6030 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6031 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1: 6032 ; GENERIC: # %bb.0: 6033 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6034 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50] 6035 ; GENERIC-NEXT: retq # sched: [1:1.00] 6036 ; 6037 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask1: 6038 ; SKX: # %bb.0: 6039 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6040 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:1.00] 6041 ; SKX-NEXT: retq # sched: [7:1.00] 6042 %vec = load <8 x i16>, <8 x i16>* %vp 6043 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 6044 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6045 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6046 ret <8 x i16> %res 6047 } 6048 6049 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %mask) { 6050 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: 6051 ; GENERIC: # %bb.0: 6052 ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6053 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50] 6054 ; GENERIC-NEXT: retq # sched: [1:1.00] 6055 ; 6056 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: 6057 ; SKX: # %bb.0: 6058 ; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6059 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:1.00] 6060 ; SKX-NEXT: retq # sched: [7:1.00] 6061 %vec = load <8 x i16>, <8 x i16>* %vp 6062 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 6063 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6064 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6065 ret <8 x i16> %res 6066 } 6067 6068 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6069 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2: 6070 ; GENERIC: # %bb.0: 6071 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6072 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50] 6073 ; GENERIC-NEXT: retq # sched: [1:1.00] 6074 ; 6075 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask2: 6076 ; SKX: # %bb.0: 6077 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6078 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:1.00] 6079 ; SKX-NEXT: retq # sched: [7:1.00] 6080 %vec = load <8 x i16>, <8 x i16>* %vp 6081 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7> 6082 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6083 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6084 ret <8 x i16> %res 6085 } 6086 6087 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %mask) { 6088 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: 6089 ; GENERIC: # %bb.0: 6090 ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6091 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50] 6092 ; GENERIC-NEXT: retq # sched: [1:1.00] 6093 ; 6094 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: 6095 ; SKX: # %bb.0: 6096 ; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6097 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:1.00] 6098 ; SKX-NEXT: retq # sched: [7:1.00] 6099 %vec = load <8 x i16>, <8 x i16>* %vp 6100 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7> 6101 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6102 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6103 ret <8 x i16> %res 6104 } 6105 6106 define <8 x i16> @test_8xi16_perm_low_mem_mask3(<8 x i16>* %vp) { 6107 ; GENERIC-LABEL: test_8xi16_perm_low_mem_mask3: 6108 ; GENERIC: # %bb.0: 6109 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] 6110 ; GENERIC-NEXT: retq # sched: [1:1.00] 6111 ; 6112 ; SKX-LABEL: test_8xi16_perm_low_mem_mask3: 6113 ; SKX: # %bb.0: 6114 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] 6115 ; SKX-NEXT: retq # sched: [7:1.00] 6116 %vec = load <8 x i16>, <8 x i16>* %vp 6117 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 6118 ret <8 x i16> %res 6119 } 6120 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6121 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3: 6122 ; GENERIC: # %bb.0: 6123 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6124 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] 6125 ; GENERIC-NEXT: retq # sched: [1:1.00] 6126 ; 6127 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask3: 6128 ; SKX: # %bb.0: 6129 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6130 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] 6131 ; SKX-NEXT: retq # sched: [7:1.00] 6132 %vec = load <8 x i16>, <8 x i16>* %vp 6133 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 6134 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6135 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6136 ret <8 x i16> %res 6137 } 6138 6139 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %mask) { 6140 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: 6141 ; GENERIC: # %bb.0: 6142 ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6143 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] 6144 ; GENERIC-NEXT: retq # sched: [1:1.00] 6145 ; 6146 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: 6147 ; SKX: # %bb.0: 6148 ; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6149 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] 6150 ; SKX-NEXT: retq # sched: [7:1.00] 6151 %vec = load <8 x i16>, <8 x i16>* %vp 6152 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7> 6153 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6154 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6155 ret <8 x i16> %res 6156 } 6157 6158 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6159 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4: 6160 ; GENERIC: # %bb.0: 6161 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6162 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50] 6163 ; GENERIC-NEXT: retq # sched: [1:1.00] 6164 ; 6165 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask4: 6166 ; SKX: # %bb.0: 6167 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6168 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:1.00] 6169 ; SKX-NEXT: retq # sched: [7:1.00] 6170 %vec = load <8 x i16>, <8 x i16>* %vp 6171 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5> 6172 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6173 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6174 ret <8 x i16> %res 6175 } 6176 6177 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %mask) { 6178 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: 6179 ; GENERIC: # %bb.0: 6180 ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6181 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50] 6182 ; GENERIC-NEXT: retq # sched: [1:1.00] 6183 ; 6184 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: 6185 ; SKX: # %bb.0: 6186 ; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6187 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:1.00] 6188 ; SKX-NEXT: retq # sched: [7:1.00] 6189 %vec = load <8 x i16>, <8 x i16>* %vp 6190 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5> 6191 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6192 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6193 ret <8 x i16> %res 6194 } 6195 6196 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6197 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5: 6198 ; GENERIC: # %bb.0: 6199 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6200 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50] 6201 ; GENERIC-NEXT: retq # sched: [1:1.00] 6202 ; 6203 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask5: 6204 ; SKX: # %bb.0: 6205 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6206 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:1.00] 6207 ; SKX-NEXT: retq # sched: [7:1.00] 6208 %vec = load <8 x i16>, <8 x i16>* %vp 6209 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 6210 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6211 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6212 ret <8 x i16> %res 6213 } 6214 6215 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %mask) { 6216 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: 6217 ; GENERIC: # %bb.0: 6218 ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6219 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50] 6220 ; GENERIC-NEXT: retq # sched: [1:1.00] 6221 ; 6222 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: 6223 ; SKX: # %bb.0: 6224 ; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6225 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:1.00] 6226 ; SKX-NEXT: retq # sched: [7:1.00] 6227 %vec = load <8 x i16>, <8 x i16>* %vp 6228 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 6229 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6230 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6231 ret <8 x i16> %res 6232 } 6233 6234 define <8 x i16> @test_8xi16_perm_high_mem_mask6(<8 x i16>* %vp) { 6235 ; GENERIC-LABEL: test_8xi16_perm_high_mem_mask6: 6236 ; GENERIC: # %bb.0: 6237 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] 6238 ; GENERIC-NEXT: retq # sched: [1:1.00] 6239 ; 6240 ; SKX-LABEL: test_8xi16_perm_high_mem_mask6: 6241 ; SKX: # %bb.0: 6242 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] 6243 ; SKX-NEXT: retq # sched: [7:1.00] 6244 %vec = load <8 x i16>, <8 x i16>* %vp 6245 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 6246 ret <8 x i16> %res 6247 } 6248 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6249 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6: 6250 ; GENERIC: # %bb.0: 6251 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6252 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] 6253 ; GENERIC-NEXT: retq # sched: [1:1.00] 6254 ; 6255 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask6: 6256 ; SKX: # %bb.0: 6257 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6258 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] 6259 ; SKX-NEXT: retq # sched: [7:1.00] 6260 %vec = load <8 x i16>, <8 x i16>* %vp 6261 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 6262 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6263 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6264 ret <8 x i16> %res 6265 } 6266 6267 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %mask) { 6268 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: 6269 ; GENERIC: # %bb.0: 6270 ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6271 ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] 6272 ; GENERIC-NEXT: retq # sched: [1:1.00] 6273 ; 6274 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: 6275 ; SKX: # %bb.0: 6276 ; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6277 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] 6278 ; SKX-NEXT: retq # sched: [7:1.00] 6279 %vec = load <8 x i16>, <8 x i16>* %vp 6280 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4> 6281 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6282 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6283 ret <8 x i16> %res 6284 } 6285 6286 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { 6287 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7: 6288 ; GENERIC: # %bb.0: 6289 ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] 6290 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50] 6291 ; GENERIC-NEXT: retq # sched: [1:1.00] 6292 ; 6293 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask7: 6294 ; SKX: # %bb.0: 6295 ; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] 6296 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:1.00] 6297 ; SKX-NEXT: retq # sched: [7:1.00] 6298 %vec = load <8 x i16>, <8 x i16>* %vp 6299 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7> 6300 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6301 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 6302 ret <8 x i16> %res 6303 } 6304 6305 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %mask) { 6306 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: 6307 ; GENERIC: # %bb.0: 6308 ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] 6309 ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50] 6310 ; GENERIC-NEXT: retq # sched: [1:1.00] 6311 ; 6312 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: 6313 ; SKX: # %bb.0: 6314 ; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] 6315 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:1.00] 6316 ; SKX-NEXT: retq # sched: [7:1.00] 6317 %vec = load <8 x i16>, <8 x i16>* %vp 6318 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7> 6319 %cmp = icmp eq <8 x i16> %mask, zeroinitializer 6320 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer 6321 ret <8 x i16> %res 6322 } 6323 6324 define <16 x i16> @test_16xi16_perm_high_mask0(<16 x i16> %vec) { 6325 ; GENERIC-LABEL: test_16xi16_perm_high_mask0: 6326 ; GENERIC: # %bb.0: 6327 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6328 ; GENERIC-NEXT: retq # sched: [1:1.00] 6329 ; 6330 ; SKX-LABEL: test_16xi16_perm_high_mask0: 6331 ; SKX: # %bb.0: 6332 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6333 ; SKX-NEXT: retq # sched: [7:1.00] 6334 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 6335 ret <16 x i16> %res 6336 } 6337 define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6338 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask0: 6339 ; GENERIC: # %bb.0: 6340 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6341 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6342 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6343 ; GENERIC-NEXT: retq # sched: [1:1.00] 6344 ; 6345 ; SKX-LABEL: test_masked_16xi16_perm_high_mask0: 6346 ; SKX: # %bb.0: 6347 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6348 ; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6349 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6350 ; SKX-NEXT: retq # sched: [7:1.00] 6351 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 6352 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6353 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6354 ret <16 x i16> %res 6355 } 6356 6357 define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) { 6358 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask0: 6359 ; GENERIC: # %bb.0: 6360 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6361 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6362 ; GENERIC-NEXT: retq # sched: [1:1.00] 6363 ; 6364 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mask0: 6365 ; SKX: # %bb.0: 6366 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6367 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] 6368 ; SKX-NEXT: retq # sched: [7:1.00] 6369 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12> 6370 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6371 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6372 ret <16 x i16> %res 6373 } 6374 define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6375 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask1: 6376 ; GENERIC: # %bb.0: 6377 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6378 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] 6379 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6380 ; GENERIC-NEXT: retq # sched: [1:1.00] 6381 ; 6382 ; SKX-LABEL: test_masked_16xi16_perm_low_mask1: 6383 ; SKX: # %bb.0: 6384 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6385 ; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] 6386 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6387 ; SKX-NEXT: retq # sched: [7:1.00] 6388 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6389 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6390 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6391 ret <16 x i16> %res 6392 } 6393 6394 define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) { 6395 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask1: 6396 ; GENERIC: # %bb.0: 6397 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6398 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] 6399 ; GENERIC-NEXT: retq # sched: [1:1.00] 6400 ; 6401 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mask1: 6402 ; SKX: # %bb.0: 6403 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6404 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] 6405 ; SKX-NEXT: retq # sched: [7:1.00] 6406 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6407 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6408 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6409 ret <16 x i16> %res 6410 } 6411 define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6412 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask2: 6413 ; GENERIC: # %bb.0: 6414 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6415 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] 6416 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6417 ; GENERIC-NEXT: retq # sched: [1:1.00] 6418 ; 6419 ; SKX-LABEL: test_masked_16xi16_perm_high_mask2: 6420 ; SKX: # %bb.0: 6421 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6422 ; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] 6423 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6424 ; SKX-NEXT: retq # sched: [7:1.00] 6425 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13> 6426 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6427 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6428 ret <16 x i16> %res 6429 } 6430 6431 define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) { 6432 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask2: 6433 ; GENERIC: # %bb.0: 6434 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6435 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] 6436 ; GENERIC-NEXT: retq # sched: [1:1.00] 6437 ; 6438 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mask2: 6439 ; SKX: # %bb.0: 6440 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6441 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] 6442 ; SKX-NEXT: retq # sched: [7:1.00] 6443 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13> 6444 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6445 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6446 ret <16 x i16> %res 6447 } 6448 define <16 x i16> @test_16xi16_perm_low_mask3(<16 x i16> %vec) { 6449 ; GENERIC-LABEL: test_16xi16_perm_low_mask3: 6450 ; GENERIC: # %bb.0: 6451 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6452 ; GENERIC-NEXT: retq # sched: [1:1.00] 6453 ; 6454 ; SKX-LABEL: test_16xi16_perm_low_mask3: 6455 ; SKX: # %bb.0: 6456 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6457 ; SKX-NEXT: retq # sched: [7:1.00] 6458 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6459 ret <16 x i16> %res 6460 } 6461 define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6462 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask3: 6463 ; GENERIC: # %bb.0: 6464 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6465 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6466 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6467 ; GENERIC-NEXT: retq # sched: [1:1.00] 6468 ; 6469 ; SKX-LABEL: test_masked_16xi16_perm_low_mask3: 6470 ; SKX: # %bb.0: 6471 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6472 ; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6473 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6474 ; SKX-NEXT: retq # sched: [7:1.00] 6475 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6476 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6477 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6478 ret <16 x i16> %res 6479 } 6480 6481 define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) { 6482 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask3: 6483 ; GENERIC: # %bb.0: 6484 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6485 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6486 ; GENERIC-NEXT: retq # sched: [1:1.00] 6487 ; 6488 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mask3: 6489 ; SKX: # %bb.0: 6490 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6491 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] 6492 ; SKX-NEXT: retq # sched: [7:1.00] 6493 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6494 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6495 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6496 ret <16 x i16> %res 6497 } 6498 define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6499 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask4: 6500 ; GENERIC: # %bb.0: 6501 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6502 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] 6503 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6504 ; GENERIC-NEXT: retq # sched: [1:1.00] 6505 ; 6506 ; SKX-LABEL: test_masked_16xi16_perm_high_mask4: 6507 ; SKX: # %bb.0: 6508 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6509 ; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] 6510 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6511 ; SKX-NEXT: retq # sched: [7:1.00] 6512 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15> 6513 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6514 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6515 ret <16 x i16> %res 6516 } 6517 6518 define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) { 6519 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask4: 6520 ; GENERIC: # %bb.0: 6521 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6522 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] 6523 ; GENERIC-NEXT: retq # sched: [1:1.00] 6524 ; 6525 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mask4: 6526 ; SKX: # %bb.0: 6527 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6528 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] 6529 ; SKX-NEXT: retq # sched: [7:1.00] 6530 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15> 6531 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6532 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6533 ret <16 x i16> %res 6534 } 6535 define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6536 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask5: 6537 ; GENERIC: # %bb.0: 6538 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6539 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] 6540 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6541 ; GENERIC-NEXT: retq # sched: [1:1.00] 6542 ; 6543 ; SKX-LABEL: test_masked_16xi16_perm_low_mask5: 6544 ; SKX: # %bb.0: 6545 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6546 ; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] 6547 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6548 ; SKX-NEXT: retq # sched: [7:1.00] 6549 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6550 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6551 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6552 ret <16 x i16> %res 6553 } 6554 6555 define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) { 6556 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask5: 6557 ; GENERIC: # %bb.0: 6558 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6559 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] 6560 ; GENERIC-NEXT: retq # sched: [1:1.00] 6561 ; 6562 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mask5: 6563 ; SKX: # %bb.0: 6564 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6565 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] 6566 ; SKX-NEXT: retq # sched: [7:1.00] 6567 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6568 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6569 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6570 ret <16 x i16> %res 6571 } 6572 define <16 x i16> @test_16xi16_perm_high_mask6(<16 x i16> %vec) { 6573 ; GENERIC-LABEL: test_16xi16_perm_high_mask6: 6574 ; GENERIC: # %bb.0: 6575 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6576 ; GENERIC-NEXT: retq # sched: [1:1.00] 6577 ; 6578 ; SKX-LABEL: test_16xi16_perm_high_mask6: 6579 ; SKX: # %bb.0: 6580 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6581 ; SKX-NEXT: retq # sched: [7:1.00] 6582 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 6583 ret <16 x i16> %res 6584 } 6585 define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6586 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask6: 6587 ; GENERIC: # %bb.0: 6588 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6589 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6590 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6591 ; GENERIC-NEXT: retq # sched: [1:1.00] 6592 ; 6593 ; SKX-LABEL: test_masked_16xi16_perm_high_mask6: 6594 ; SKX: # %bb.0: 6595 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6596 ; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6597 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6598 ; SKX-NEXT: retq # sched: [7:1.00] 6599 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 6600 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6601 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6602 ret <16 x i16> %res 6603 } 6604 6605 define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) { 6606 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask6: 6607 ; GENERIC: # %bb.0: 6608 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6609 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6610 ; GENERIC-NEXT: retq # sched: [1:1.00] 6611 ; 6612 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mask6: 6613 ; SKX: # %bb.0: 6614 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6615 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] 6616 ; SKX-NEXT: retq # sched: [7:1.00] 6617 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13> 6618 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6619 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6620 ret <16 x i16> %res 6621 } 6622 define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { 6623 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask7: 6624 ; GENERIC: # %bb.0: 6625 ; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] 6626 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] 6627 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 6628 ; GENERIC-NEXT: retq # sched: [1:1.00] 6629 ; 6630 ; SKX-LABEL: test_masked_16xi16_perm_low_mask7: 6631 ; SKX: # %bb.0: 6632 ; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] 6633 ; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] 6634 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 6635 ; SKX-NEXT: retq # sched: [7:1.00] 6636 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15> 6637 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6638 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6639 ret <16 x i16> %res 6640 } 6641 6642 define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) { 6643 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask7: 6644 ; GENERIC: # %bb.0: 6645 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6646 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] 6647 ; GENERIC-NEXT: retq # sched: [1:1.00] 6648 ; 6649 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mask7: 6650 ; SKX: # %bb.0: 6651 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6652 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] 6653 ; SKX-NEXT: retq # sched: [7:1.00] 6654 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15> 6655 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6656 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6657 ret <16 x i16> %res 6658 } 6659 define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) { 6660 ; GENERIC-LABEL: test_16xi16_perm_high_mem_mask0: 6661 ; GENERIC: # %bb.0: 6662 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6663 ; GENERIC-NEXT: retq # sched: [1:1.00] 6664 ; 6665 ; SKX-LABEL: test_16xi16_perm_high_mem_mask0: 6666 ; SKX: # %bb.0: 6667 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6668 ; SKX-NEXT: retq # sched: [7:1.00] 6669 %vec = load <16 x i16>, <16 x i16>* %vp 6670 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 6671 ret <16 x i16> %res 6672 } 6673 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6674 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask0: 6675 ; GENERIC: # %bb.0: 6676 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6677 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6678 ; GENERIC-NEXT: retq # sched: [1:1.00] 6679 ; 6680 ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask0: 6681 ; SKX: # %bb.0: 6682 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6683 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6684 ; SKX-NEXT: retq # sched: [7:1.00] 6685 %vec = load <16 x i16>, <16 x i16>* %vp 6686 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 6687 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6688 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6689 ret <16 x i16> %res 6690 } 6691 6692 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { 6693 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: 6694 ; GENERIC: # %bb.0: 6695 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6696 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6697 ; GENERIC-NEXT: retq # sched: [1:1.00] 6698 ; 6699 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: 6700 ; SKX: # %bb.0: 6701 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6702 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] 6703 ; SKX-NEXT: retq # sched: [7:1.00] 6704 %vec = load <16 x i16>, <16 x i16>* %vp 6705 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15> 6706 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6707 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6708 ret <16 x i16> %res 6709 } 6710 6711 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6712 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask1: 6713 ; GENERIC: # %bb.0: 6714 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6715 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] 6716 ; GENERIC-NEXT: retq # sched: [1:1.00] 6717 ; 6718 ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask1: 6719 ; SKX: # %bb.0: 6720 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6721 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] 6722 ; SKX-NEXT: retq # sched: [7:1.00] 6723 %vec = load <16 x i16>, <16 x i16>* %vp 6724 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6725 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6726 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6727 ret <16 x i16> %res 6728 } 6729 6730 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { 6731 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: 6732 ; GENERIC: # %bb.0: 6733 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6734 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] 6735 ; GENERIC-NEXT: retq # sched: [1:1.00] 6736 ; 6737 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: 6738 ; SKX: # %bb.0: 6739 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6740 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] 6741 ; SKX-NEXT: retq # sched: [7:1.00] 6742 %vec = load <16 x i16>, <16 x i16>* %vp 6743 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6744 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6745 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6746 ret <16 x i16> %res 6747 } 6748 6749 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6750 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask2: 6751 ; GENERIC: # %bb.0: 6752 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6753 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] 6754 ; GENERIC-NEXT: retq # sched: [1:1.00] 6755 ; 6756 ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask2: 6757 ; SKX: # %bb.0: 6758 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6759 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] 6760 ; SKX-NEXT: retq # sched: [7:1.00] 6761 %vec = load <16 x i16>, <16 x i16>* %vp 6762 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14> 6763 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6764 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6765 ret <16 x i16> %res 6766 } 6767 6768 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { 6769 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: 6770 ; GENERIC: # %bb.0: 6771 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6772 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] 6773 ; GENERIC-NEXT: retq # sched: [1:1.00] 6774 ; 6775 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: 6776 ; SKX: # %bb.0: 6777 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6778 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] 6779 ; SKX-NEXT: retq # sched: [7:1.00] 6780 %vec = load <16 x i16>, <16 x i16>* %vp 6781 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14> 6782 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6783 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6784 ret <16 x i16> %res 6785 } 6786 6787 define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) { 6788 ; GENERIC-LABEL: test_16xi16_perm_low_mem_mask3: 6789 ; GENERIC: # %bb.0: 6790 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6791 ; GENERIC-NEXT: retq # sched: [1:1.00] 6792 ; 6793 ; SKX-LABEL: test_16xi16_perm_low_mem_mask3: 6794 ; SKX: # %bb.0: 6795 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6796 ; SKX-NEXT: retq # sched: [7:1.00] 6797 %vec = load <16 x i16>, <16 x i16>* %vp 6798 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6799 ret <16 x i16> %res 6800 } 6801 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6802 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask3: 6803 ; GENERIC: # %bb.0: 6804 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6805 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6806 ; GENERIC-NEXT: retq # sched: [1:1.00] 6807 ; 6808 ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask3: 6809 ; SKX: # %bb.0: 6810 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6811 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6812 ; SKX-NEXT: retq # sched: [7:1.00] 6813 %vec = load <16 x i16>, <16 x i16>* %vp 6814 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6815 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6816 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6817 ret <16 x i16> %res 6818 } 6819 6820 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { 6821 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: 6822 ; GENERIC: # %bb.0: 6823 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6824 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6825 ; GENERIC-NEXT: retq # sched: [1:1.00] 6826 ; 6827 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: 6828 ; SKX: # %bb.0: 6829 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6830 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] 6831 ; SKX-NEXT: retq # sched: [7:1.00] 6832 %vec = load <16 x i16>, <16 x i16>* %vp 6833 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15> 6834 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6835 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6836 ret <16 x i16> %res 6837 } 6838 6839 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6840 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask4: 6841 ; GENERIC: # %bb.0: 6842 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6843 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] 6844 ; GENERIC-NEXT: retq # sched: [1:1.00] 6845 ; 6846 ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask4: 6847 ; SKX: # %bb.0: 6848 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6849 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] 6850 ; SKX-NEXT: retq # sched: [7:1.00] 6851 %vec = load <16 x i16>, <16 x i16>* %vp 6852 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15> 6853 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6854 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6855 ret <16 x i16> %res 6856 } 6857 6858 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %mask) { 6859 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: 6860 ; GENERIC: # %bb.0: 6861 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6862 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] 6863 ; GENERIC-NEXT: retq # sched: [1:1.00] 6864 ; 6865 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: 6866 ; SKX: # %bb.0: 6867 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6868 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] 6869 ; SKX-NEXT: retq # sched: [7:1.00] 6870 %vec = load <16 x i16>, <16 x i16>* %vp 6871 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15> 6872 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6873 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6874 ret <16 x i16> %res 6875 } 6876 6877 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6878 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask5: 6879 ; GENERIC: # %bb.0: 6880 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6881 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] 6882 ; GENERIC-NEXT: retq # sched: [1:1.00] 6883 ; 6884 ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask5: 6885 ; SKX: # %bb.0: 6886 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6887 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] 6888 ; SKX-NEXT: retq # sched: [7:1.00] 6889 %vec = load <16 x i16>, <16 x i16>* %vp 6890 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6891 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6892 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6893 ret <16 x i16> %res 6894 } 6895 6896 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %mask) { 6897 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: 6898 ; GENERIC: # %bb.0: 6899 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6900 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] 6901 ; GENERIC-NEXT: retq # sched: [1:1.00] 6902 ; 6903 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: 6904 ; SKX: # %bb.0: 6905 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6906 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] 6907 ; SKX-NEXT: retq # sched: [7:1.00] 6908 %vec = load <16 x i16>, <16 x i16>* %vp 6909 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6910 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6911 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6912 ret <16 x i16> %res 6913 } 6914 6915 define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) { 6916 ; GENERIC-LABEL: test_16xi16_perm_high_mem_mask6: 6917 ; GENERIC: # %bb.0: 6918 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6919 ; GENERIC-NEXT: retq # sched: [1:1.00] 6920 ; 6921 ; SKX-LABEL: test_16xi16_perm_high_mem_mask6: 6922 ; SKX: # %bb.0: 6923 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6924 ; SKX-NEXT: retq # sched: [7:1.00] 6925 %vec = load <16 x i16>, <16 x i16>* %vp 6926 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 6927 ret <16 x i16> %res 6928 } 6929 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6930 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask6: 6931 ; GENERIC: # %bb.0: 6932 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6933 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6934 ; GENERIC-NEXT: retq # sched: [1:1.00] 6935 ; 6936 ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask6: 6937 ; SKX: # %bb.0: 6938 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6939 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6940 ; SKX-NEXT: retq # sched: [7:1.00] 6941 %vec = load <16 x i16>, <16 x i16>* %vp 6942 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 6943 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6944 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6945 ret <16 x i16> %res 6946 } 6947 6948 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %mask) { 6949 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: 6950 ; GENERIC: # %bb.0: 6951 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6952 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6953 ; GENERIC-NEXT: retq # sched: [1:1.00] 6954 ; 6955 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: 6956 ; SKX: # %bb.0: 6957 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6958 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] 6959 ; SKX-NEXT: retq # sched: [7:1.00] 6960 %vec = load <16 x i16>, <16 x i16>* %vp 6961 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13> 6962 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6963 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 6964 ret <16 x i16> %res 6965 } 6966 6967 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { 6968 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask7: 6969 ; GENERIC: # %bb.0: 6970 ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] 6971 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] 6972 ; GENERIC-NEXT: retq # sched: [1:1.00] 6973 ; 6974 ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask7: 6975 ; SKX: # %bb.0: 6976 ; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] 6977 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] 6978 ; SKX-NEXT: retq # sched: [7:1.00] 6979 %vec = load <16 x i16>, <16 x i16>* %vp 6980 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 6981 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 6982 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 6983 ret <16 x i16> %res 6984 } 6985 6986 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %mask) { 6987 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: 6988 ; GENERIC: # %bb.0: 6989 ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] 6990 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] 6991 ; GENERIC-NEXT: retq # sched: [1:1.00] 6992 ; 6993 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: 6994 ; SKX: # %bb.0: 6995 ; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] 6996 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] 6997 ; SKX-NEXT: retq # sched: [7:1.00] 6998 %vec = load <16 x i16>, <16 x i16>* %vp 6999 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 7000 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 7001 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 7002 ret <16 x i16> %res 7003 } 7004 7005 define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) { 7006 ; GENERIC-LABEL: test_32xi16_perm_high_mask0: 7007 ; GENERIC: # %bb.0: 7008 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7009 ; GENERIC-NEXT: retq # sched: [1:1.00] 7010 ; 7011 ; SKX-LABEL: test_32xi16_perm_high_mask0: 7012 ; SKX: # %bb.0: 7013 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7014 ; SKX-NEXT: retq # sched: [7:1.00] 7015 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 7016 ret <32 x i16> %res 7017 } 7018 define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7019 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask0: 7020 ; GENERIC: # %bb.0: 7021 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7022 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7023 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7024 ; GENERIC-NEXT: retq # sched: [1:1.00] 7025 ; 7026 ; SKX-LABEL: test_masked_32xi16_perm_high_mask0: 7027 ; SKX: # %bb.0: 7028 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7029 ; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7030 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7031 ; SKX-NEXT: retq # sched: [7:1.00] 7032 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 7033 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7034 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7035 ret <32 x i16> %res 7036 } 7037 7038 define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) { 7039 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask0: 7040 ; GENERIC: # %bb.0: 7041 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7042 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7043 ; GENERIC-NEXT: retq # sched: [1:1.00] 7044 ; 7045 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask0: 7046 ; SKX: # %bb.0: 7047 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7048 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] 7049 ; SKX-NEXT: retq # sched: [7:1.00] 7050 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28> 7051 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7052 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7053 ret <32 x i16> %res 7054 } 7055 define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7056 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask1: 7057 ; GENERIC: # %bb.0: 7058 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7059 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] 7060 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7061 ; GENERIC-NEXT: retq # sched: [1:1.00] 7062 ; 7063 ; SKX-LABEL: test_masked_32xi16_perm_low_mask1: 7064 ; SKX: # %bb.0: 7065 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7066 ; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] 7067 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7068 ; SKX-NEXT: retq # sched: [7:1.00] 7069 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31> 7070 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7071 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7072 ret <32 x i16> %res 7073 } 7074 7075 define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) { 7076 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask1: 7077 ; GENERIC: # %bb.0: 7078 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7079 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] 7080 ; GENERIC-NEXT: retq # sched: [1:1.00] 7081 ; 7082 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask1: 7083 ; SKX: # %bb.0: 7084 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7085 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] 7086 ; SKX-NEXT: retq # sched: [7:1.00] 7087 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31> 7088 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7089 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7090 ret <32 x i16> %res 7091 } 7092 define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7093 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask2: 7094 ; GENERIC: # %bb.0: 7095 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7096 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] 7097 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7098 ; GENERIC-NEXT: retq # sched: [1:1.00] 7099 ; 7100 ; SKX-LABEL: test_masked_32xi16_perm_high_mask2: 7101 ; SKX: # %bb.0: 7102 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7103 ; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] 7104 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7105 ; SKX-NEXT: retq # sched: [7:1.00] 7106 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31> 7107 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7108 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7109 ret <32 x i16> %res 7110 } 7111 7112 define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) { 7113 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask2: 7114 ; GENERIC: # %bb.0: 7115 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7116 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] 7117 ; GENERIC-NEXT: retq # sched: [1:1.00] 7118 ; 7119 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask2: 7120 ; SKX: # %bb.0: 7121 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7122 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] 7123 ; SKX-NEXT: retq # sched: [7:1.00] 7124 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31> 7125 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7126 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7127 ret <32 x i16> %res 7128 } 7129 define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) { 7130 ; GENERIC-LABEL: test_32xi16_perm_low_mask3: 7131 ; GENERIC: # %bb.0: 7132 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7133 ; GENERIC-NEXT: retq # sched: [1:1.00] 7134 ; 7135 ; SKX-LABEL: test_32xi16_perm_low_mask3: 7136 ; SKX: # %bb.0: 7137 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7138 ; SKX-NEXT: retq # sched: [7:1.00] 7139 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 7140 ret <32 x i16> %res 7141 } 7142 define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7143 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask3: 7144 ; GENERIC: # %bb.0: 7145 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7146 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7147 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7148 ; GENERIC-NEXT: retq # sched: [1:1.00] 7149 ; 7150 ; SKX-LABEL: test_masked_32xi16_perm_low_mask3: 7151 ; SKX: # %bb.0: 7152 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7153 ; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7154 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7155 ; SKX-NEXT: retq # sched: [7:1.00] 7156 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 7157 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7158 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7159 ret <32 x i16> %res 7160 } 7161 7162 define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) { 7163 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask3: 7164 ; GENERIC: # %bb.0: 7165 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7166 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7167 ; GENERIC-NEXT: retq # sched: [1:1.00] 7168 ; 7169 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask3: 7170 ; SKX: # %bb.0: 7171 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7172 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] 7173 ; SKX-NEXT: retq # sched: [7:1.00] 7174 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31> 7175 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7176 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7177 ret <32 x i16> %res 7178 } 7179 define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7180 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask4: 7181 ; GENERIC: # %bb.0: 7182 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7183 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] 7184 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7185 ; GENERIC-NEXT: retq # sched: [1:1.00] 7186 ; 7187 ; SKX-LABEL: test_masked_32xi16_perm_high_mask4: 7188 ; SKX: # %bb.0: 7189 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7190 ; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] 7191 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7192 ; SKX-NEXT: retq # sched: [7:1.00] 7193 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30> 7194 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7195 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7196 ret <32 x i16> %res 7197 } 7198 7199 define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) { 7200 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask4: 7201 ; GENERIC: # %bb.0: 7202 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7203 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] 7204 ; GENERIC-NEXT: retq # sched: [1:1.00] 7205 ; 7206 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask4: 7207 ; SKX: # %bb.0: 7208 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7209 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] 7210 ; SKX-NEXT: retq # sched: [7:1.00] 7211 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30> 7212 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7213 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7214 ret <32 x i16> %res 7215 } 7216 define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7217 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask5: 7218 ; GENERIC: # %bb.0: 7219 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7220 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] 7221 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7222 ; GENERIC-NEXT: retq # sched: [1:1.00] 7223 ; 7224 ; SKX-LABEL: test_masked_32xi16_perm_low_mask5: 7225 ; SKX: # %bb.0: 7226 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7227 ; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] 7228 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7229 ; SKX-NEXT: retq # sched: [7:1.00] 7230 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31> 7231 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7232 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7233 ret <32 x i16> %res 7234 } 7235 7236 define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) { 7237 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask5: 7238 ; GENERIC: # %bb.0: 7239 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7240 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] 7241 ; GENERIC-NEXT: retq # sched: [1:1.00] 7242 ; 7243 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask5: 7244 ; SKX: # %bb.0: 7245 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7246 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] 7247 ; SKX-NEXT: retq # sched: [7:1.00] 7248 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31> 7249 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7250 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7251 ret <32 x i16> %res 7252 } 7253 define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) { 7254 ; GENERIC-LABEL: test_32xi16_perm_high_mask6: 7255 ; GENERIC: # %bb.0: 7256 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7257 ; GENERIC-NEXT: retq # sched: [1:1.00] 7258 ; 7259 ; SKX-LABEL: test_32xi16_perm_high_mask6: 7260 ; SKX: # %bb.0: 7261 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7262 ; SKX-NEXT: retq # sched: [7:1.00] 7263 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 7264 ret <32 x i16> %res 7265 } 7266 define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7267 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask6: 7268 ; GENERIC: # %bb.0: 7269 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7270 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7271 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7272 ; GENERIC-NEXT: retq # sched: [1:1.00] 7273 ; 7274 ; SKX-LABEL: test_masked_32xi16_perm_high_mask6: 7275 ; SKX: # %bb.0: 7276 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7277 ; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7278 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7279 ; SKX-NEXT: retq # sched: [7:1.00] 7280 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 7281 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7282 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7283 ret <32 x i16> %res 7284 } 7285 7286 define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) { 7287 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask6: 7288 ; GENERIC: # %bb.0: 7289 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7290 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7291 ; GENERIC-NEXT: retq # sched: [1:1.00] 7292 ; 7293 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask6: 7294 ; SKX: # %bb.0: 7295 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7296 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] 7297 ; SKX-NEXT: retq # sched: [7:1.00] 7298 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30> 7299 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7300 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7301 ret <32 x i16> %res 7302 } 7303 define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { 7304 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask7: 7305 ; GENERIC: # %bb.0: 7306 ; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] 7307 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] 7308 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 7309 ; GENERIC-NEXT: retq # sched: [1:1.00] 7310 ; 7311 ; SKX-LABEL: test_masked_32xi16_perm_low_mask7: 7312 ; SKX: # %bb.0: 7313 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] 7314 ; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] 7315 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 7316 ; SKX-NEXT: retq # sched: [7:1.00] 7317 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 7318 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7319 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7320 ret <32 x i16> %res 7321 } 7322 7323 define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) { 7324 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask7: 7325 ; GENERIC: # %bb.0: 7326 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7327 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] 7328 ; GENERIC-NEXT: retq # sched: [1:1.00] 7329 ; 7330 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask7: 7331 ; SKX: # %bb.0: 7332 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7333 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] 7334 ; SKX-NEXT: retq # sched: [7:1.00] 7335 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 7336 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7337 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7338 ret <32 x i16> %res 7339 } 7340 define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { 7341 ; GENERIC-LABEL: test_32xi16_perm_high_mem_mask0: 7342 ; GENERIC: # %bb.0: 7343 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7344 ; GENERIC-NEXT: retq # sched: [1:1.00] 7345 ; 7346 ; SKX-LABEL: test_32xi16_perm_high_mem_mask0: 7347 ; SKX: # %bb.0: 7348 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7349 ; SKX-NEXT: retq # sched: [7:1.00] 7350 %vec = load <32 x i16>, <32 x i16>* %vp 7351 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 7352 ret <32 x i16> %res 7353 } 7354 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7355 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0: 7356 ; GENERIC: # %bb.0: 7357 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7358 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7359 ; GENERIC-NEXT: retq # sched: [1:1.00] 7360 ; 7361 ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask0: 7362 ; SKX: # %bb.0: 7363 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7364 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7365 ; SKX-NEXT: retq # sched: [7:1.00] 7366 %vec = load <32 x i16>, <32 x i16>* %vp 7367 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 7368 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7369 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7370 ret <32 x i16> %res 7371 } 7372 7373 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { 7374 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: 7375 ; GENERIC: # %bb.0: 7376 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7377 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7378 ; GENERIC-NEXT: retq # sched: [1:1.00] 7379 ; 7380 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: 7381 ; SKX: # %bb.0: 7382 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7383 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] 7384 ; SKX-NEXT: retq # sched: [7:1.00] 7385 %vec = load <32 x i16>, <32 x i16>* %vp 7386 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30> 7387 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7388 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7389 ret <32 x i16> %res 7390 } 7391 7392 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7393 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1: 7394 ; GENERIC: # %bb.0: 7395 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7396 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] 7397 ; GENERIC-NEXT: retq # sched: [1:1.00] 7398 ; 7399 ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask1: 7400 ; SKX: # %bb.0: 7401 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7402 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] 7403 ; SKX-NEXT: retq # sched: [7:1.00] 7404 %vec = load <32 x i16>, <32 x i16>* %vp 7405 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31> 7406 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7407 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7408 ret <32 x i16> %res 7409 } 7410 7411 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { 7412 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: 7413 ; GENERIC: # %bb.0: 7414 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7415 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] 7416 ; GENERIC-NEXT: retq # sched: [1:1.00] 7417 ; 7418 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: 7419 ; SKX: # %bb.0: 7420 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7421 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] 7422 ; SKX-NEXT: retq # sched: [7:1.00] 7423 %vec = load <32 x i16>, <32 x i16>* %vp 7424 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31> 7425 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7426 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7427 ret <32 x i16> %res 7428 } 7429 7430 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7431 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2: 7432 ; GENERIC: # %bb.0: 7433 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7434 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] 7435 ; GENERIC-NEXT: retq # sched: [1:1.00] 7436 ; 7437 ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask2: 7438 ; SKX: # %bb.0: 7439 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7440 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] 7441 ; SKX-NEXT: retq # sched: [7:1.00] 7442 %vec = load <32 x i16>, <32 x i16>* %vp 7443 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28> 7444 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7445 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7446 ret <32 x i16> %res 7447 } 7448 7449 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { 7450 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: 7451 ; GENERIC: # %bb.0: 7452 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7453 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] 7454 ; GENERIC-NEXT: retq # sched: [1:1.00] 7455 ; 7456 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: 7457 ; SKX: # %bb.0: 7458 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7459 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] 7460 ; SKX-NEXT: retq # sched: [7:1.00] 7461 %vec = load <32 x i16>, <32 x i16>* %vp 7462 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28> 7463 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7464 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7465 ret <32 x i16> %res 7466 } 7467 7468 define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { 7469 ; GENERIC-LABEL: test_32xi16_perm_low_mem_mask3: 7470 ; GENERIC: # %bb.0: 7471 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7472 ; GENERIC-NEXT: retq # sched: [1:1.00] 7473 ; 7474 ; SKX-LABEL: test_32xi16_perm_low_mem_mask3: 7475 ; SKX: # %bb.0: 7476 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7477 ; SKX-NEXT: retq # sched: [7:1.00] 7478 %vec = load <32 x i16>, <32 x i16>* %vp 7479 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 7480 ret <32 x i16> %res 7481 } 7482 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7483 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3: 7484 ; GENERIC: # %bb.0: 7485 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7486 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7487 ; GENERIC-NEXT: retq # sched: [1:1.00] 7488 ; 7489 ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask3: 7490 ; SKX: # %bb.0: 7491 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7492 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7493 ; SKX-NEXT: retq # sched: [7:1.00] 7494 %vec = load <32 x i16>, <32 x i16>* %vp 7495 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 7496 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7497 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7498 ret <32 x i16> %res 7499 } 7500 7501 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { 7502 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: 7503 ; GENERIC: # %bb.0: 7504 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7505 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7506 ; GENERIC-NEXT: retq # sched: [1:1.00] 7507 ; 7508 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: 7509 ; SKX: # %bb.0: 7510 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7511 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] 7512 ; SKX-NEXT: retq # sched: [7:1.00] 7513 %vec = load <32 x i16>, <32 x i16>* %vp 7514 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31> 7515 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7516 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7517 ret <32 x i16> %res 7518 } 7519 7520 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7521 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4: 7522 ; GENERIC: # %bb.0: 7523 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7524 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] 7525 ; GENERIC-NEXT: retq # sched: [1:1.00] 7526 ; 7527 ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask4: 7528 ; SKX: # %bb.0: 7529 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7530 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] 7531 ; SKX-NEXT: retq # sched: [7:1.00] 7532 %vec = load <32 x i16>, <32 x i16>* %vp 7533 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29> 7534 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7535 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7536 ret <32 x i16> %res 7537 } 7538 7539 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) { 7540 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: 7541 ; GENERIC: # %bb.0: 7542 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7543 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] 7544 ; GENERIC-NEXT: retq # sched: [1:1.00] 7545 ; 7546 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: 7547 ; SKX: # %bb.0: 7548 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7549 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] 7550 ; SKX-NEXT: retq # sched: [7:1.00] 7551 %vec = load <32 x i16>, <32 x i16>* %vp 7552 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29> 7553 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7554 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7555 ret <32 x i16> %res 7556 } 7557 7558 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7559 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask5: 7560 ; GENERIC: # %bb.0: 7561 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] 7562 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7563 ; GENERIC-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.50] 7564 ; GENERIC-NEXT: retq # sched: [1:1.00] 7565 ; 7566 ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask5: 7567 ; SKX: # %bb.0: 7568 ; SKX-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] 7569 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7570 ; SKX-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.33] 7571 ; SKX-NEXT: retq # sched: [7:1.00] 7572 %vec = load <32 x i16>, <32 x i16>* %vp 7573 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31> 7574 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7575 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7576 ret <32 x i16> %res 7577 } 7578 7579 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %mask) { 7580 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: 7581 ; GENERIC: # %bb.0: 7582 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] 7583 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7584 ; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] 7585 ; GENERIC-NEXT: retq # sched: [1:1.00] 7586 ; 7587 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: 7588 ; SKX: # %bb.0: 7589 ; SKX-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] 7590 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7591 ; SKX-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] 7592 ; SKX-NEXT: retq # sched: [7:1.00] 7593 %vec = load <32 x i16>, <32 x i16>* %vp 7594 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31> 7595 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7596 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7597 ret <32 x i16> %res 7598 } 7599 7600 define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { 7601 ; GENERIC-LABEL: test_32xi16_perm_high_mem_mask6: 7602 ; GENERIC: # %bb.0: 7603 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7604 ; GENERIC-NEXT: retq # sched: [1:1.00] 7605 ; 7606 ; SKX-LABEL: test_32xi16_perm_high_mem_mask6: 7607 ; SKX: # %bb.0: 7608 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7609 ; SKX-NEXT: retq # sched: [7:1.00] 7610 %vec = load <32 x i16>, <32 x i16>* %vp 7611 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 7612 ret <32 x i16> %res 7613 } 7614 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7615 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6: 7616 ; GENERIC: # %bb.0: 7617 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7618 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7619 ; GENERIC-NEXT: retq # sched: [1:1.00] 7620 ; 7621 ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask6: 7622 ; SKX: # %bb.0: 7623 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7624 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7625 ; SKX-NEXT: retq # sched: [7:1.00] 7626 %vec = load <32 x i16>, <32 x i16>* %vp 7627 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 7628 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7629 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7630 ret <32 x i16> %res 7631 } 7632 7633 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) { 7634 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: 7635 ; GENERIC: # %bb.0: 7636 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7637 ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7638 ; GENERIC-NEXT: retq # sched: [1:1.00] 7639 ; 7640 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: 7641 ; SKX: # %bb.0: 7642 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7643 ; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] 7644 ; SKX-NEXT: retq # sched: [7:1.00] 7645 %vec = load <32 x i16>, <32 x i16>* %vp 7646 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30> 7647 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7648 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7649 ret <32 x i16> %res 7650 } 7651 7652 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { 7653 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7: 7654 ; GENERIC: # %bb.0: 7655 ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] 7656 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] 7657 ; GENERIC-NEXT: retq # sched: [1:1.00] 7658 ; 7659 ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask7: 7660 ; SKX: # %bb.0: 7661 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] 7662 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] 7663 ; SKX-NEXT: retq # sched: [7:1.00] 7664 %vec = load <32 x i16>, <32 x i16>* %vp 7665 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 7666 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7667 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 7668 ret <32 x i16> %res 7669 } 7670 7671 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) { 7672 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: 7673 ; GENERIC: # %bb.0: 7674 ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] 7675 ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] 7676 ; GENERIC-NEXT: retq # sched: [1:1.00] 7677 ; 7678 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: 7679 ; SKX: # %bb.0: 7680 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] 7681 ; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] 7682 ; SKX-NEXT: retq # sched: [7:1.00] 7683 %vec = load <32 x i16>, <32 x i16>* %vp 7684 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31> 7685 %cmp = icmp eq <32 x i16> %mask, zeroinitializer 7686 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer 7687 ret <32 x i16> %res 7688 } 7689 7690 define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) { 7691 ; GENERIC-LABEL: test_4xi32_perm_mask0: 7692 ; GENERIC: # %bb.0: 7693 ; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] sched: [1:1.00] 7694 ; GENERIC-NEXT: retq # sched: [1:1.00] 7695 ; 7696 ; SKX-LABEL: test_4xi32_perm_mask0: 7697 ; SKX: # %bb.0: 7698 ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] sched: [1:1.00] 7699 ; SKX-NEXT: retq # sched: [7:1.00] 7700 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 7701 ret <4 x i32> %res 7702 } 7703 define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 7704 ; GENERIC-LABEL: test_masked_4xi32_perm_mask0: 7705 ; GENERIC: # %bb.0: 7706 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 7707 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:0.50] 7708 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7709 ; GENERIC-NEXT: retq # sched: [1:1.00] 7710 ; 7711 ; SKX-LABEL: test_masked_4xi32_perm_mask0: 7712 ; SKX: # %bb.0: 7713 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 7714 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00] 7715 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7716 ; SKX-NEXT: retq # sched: [7:1.00] 7717 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 7718 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7719 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7720 ret <4 x i32> %res 7721 } 7722 7723 define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) { 7724 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0: 7725 ; GENERIC: # %bb.0: 7726 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7727 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:0.50] 7728 ; GENERIC-NEXT: retq # sched: [1:1.00] 7729 ; 7730 ; SKX-LABEL: test_masked_z_4xi32_perm_mask0: 7731 ; SKX: # %bb.0: 7732 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7733 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00] 7734 ; SKX-NEXT: retq # sched: [7:1.00] 7735 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0> 7736 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7737 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7738 ret <4 x i32> %res 7739 } 7740 define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 7741 ; GENERIC-LABEL: test_masked_4xi32_perm_mask1: 7742 ; GENERIC: # %bb.0: 7743 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 7744 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:0.50] 7745 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7746 ; GENERIC-NEXT: retq # sched: [1:1.00] 7747 ; 7748 ; SKX-LABEL: test_masked_4xi32_perm_mask1: 7749 ; SKX: # %bb.0: 7750 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 7751 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00] 7752 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7753 ; SKX-NEXT: retq # sched: [7:1.00] 7754 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0> 7755 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7756 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7757 ret <4 x i32> %res 7758 } 7759 7760 define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) { 7761 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1: 7762 ; GENERIC: # %bb.0: 7763 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7764 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:0.50] 7765 ; GENERIC-NEXT: retq # sched: [1:1.00] 7766 ; 7767 ; SKX-LABEL: test_masked_z_4xi32_perm_mask1: 7768 ; SKX: # %bb.0: 7769 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7770 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00] 7771 ; SKX-NEXT: retq # sched: [7:1.00] 7772 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0> 7773 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7774 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7775 ret <4 x i32> %res 7776 } 7777 define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 7778 ; GENERIC-LABEL: test_masked_4xi32_perm_mask2: 7779 ; GENERIC: # %bb.0: 7780 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 7781 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:0.50] 7782 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7783 ; GENERIC-NEXT: retq # sched: [1:1.00] 7784 ; 7785 ; SKX-LABEL: test_masked_4xi32_perm_mask2: 7786 ; SKX: # %bb.0: 7787 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 7788 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00] 7789 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7790 ; SKX-NEXT: retq # sched: [7:1.00] 7791 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0> 7792 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7793 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7794 ret <4 x i32> %res 7795 } 7796 7797 define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) { 7798 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2: 7799 ; GENERIC: # %bb.0: 7800 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7801 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:0.50] 7802 ; GENERIC-NEXT: retq # sched: [1:1.00] 7803 ; 7804 ; SKX-LABEL: test_masked_z_4xi32_perm_mask2: 7805 ; SKX: # %bb.0: 7806 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7807 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00] 7808 ; SKX-NEXT: retq # sched: [7:1.00] 7809 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0> 7810 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7811 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7812 ret <4 x i32> %res 7813 } 7814 define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) { 7815 ; GENERIC-LABEL: test_4xi32_perm_mask3: 7816 ; GENERIC: # %bb.0: 7817 ; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] sched: [1:1.00] 7818 ; GENERIC-NEXT: retq # sched: [1:1.00] 7819 ; 7820 ; SKX-LABEL: test_4xi32_perm_mask3: 7821 ; SKX: # %bb.0: 7822 ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] sched: [1:1.00] 7823 ; SKX-NEXT: retq # sched: [7:1.00] 7824 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 7825 ret <4 x i32> %res 7826 } 7827 define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { 7828 ; GENERIC-LABEL: test_masked_4xi32_perm_mask3: 7829 ; GENERIC: # %bb.0: 7830 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 7831 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:0.50] 7832 ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7833 ; GENERIC-NEXT: retq # sched: [1:1.00] 7834 ; 7835 ; SKX-LABEL: test_masked_4xi32_perm_mask3: 7836 ; SKX: # %bb.0: 7837 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 7838 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00] 7839 ; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] 7840 ; SKX-NEXT: retq # sched: [7:1.00] 7841 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 7842 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7843 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7844 ret <4 x i32> %res 7845 } 7846 7847 define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) { 7848 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3: 7849 ; GENERIC: # %bb.0: 7850 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7851 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:0.50] 7852 ; GENERIC-NEXT: retq # sched: [1:1.00] 7853 ; 7854 ; SKX-LABEL: test_masked_z_4xi32_perm_mask3: 7855 ; SKX: # %bb.0: 7856 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7857 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00] 7858 ; SKX-NEXT: retq # sched: [7:1.00] 7859 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 7860 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7861 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7862 ret <4 x i32> %res 7863 } 7864 define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) { 7865 ; GENERIC-LABEL: test_4xi32_perm_mem_mask0: 7866 ; GENERIC: # %bb.0: 7867 ; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] sched: [7:1.00] 7868 ; GENERIC-NEXT: retq # sched: [1:1.00] 7869 ; 7870 ; SKX-LABEL: test_4xi32_perm_mem_mask0: 7871 ; SKX: # %bb.0: 7872 ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] sched: [7:1.00] 7873 ; SKX-NEXT: retq # sched: [7:1.00] 7874 %vec = load <4 x i32>, <4 x i32>* %vp 7875 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 7876 ret <4 x i32> %res 7877 } 7878 define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 7879 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0: 7880 ; GENERIC: # %bb.0: 7881 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7882 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:0.50] 7883 ; GENERIC-NEXT: retq # sched: [1:1.00] 7884 ; 7885 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask0: 7886 ; SKX: # %bb.0: 7887 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7888 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:1.00] 7889 ; SKX-NEXT: retq # sched: [7:1.00] 7890 %vec = load <4 x i32>, <4 x i32>* %vp 7891 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 7892 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7893 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7894 ret <4 x i32> %res 7895 } 7896 7897 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %mask) { 7898 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0: 7899 ; GENERIC: # %bb.0: 7900 ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] 7901 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:0.50] 7902 ; GENERIC-NEXT: retq # sched: [1:1.00] 7903 ; 7904 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask0: 7905 ; SKX: # %bb.0: 7906 ; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] 7907 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:1.00] 7908 ; SKX-NEXT: retq # sched: [7:1.00] 7909 %vec = load <4 x i32>, <4 x i32>* %vp 7910 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> 7911 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7912 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7913 ret <4 x i32> %res 7914 } 7915 7916 define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 7917 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1: 7918 ; GENERIC: # %bb.0: 7919 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7920 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:0.50] 7921 ; GENERIC-NEXT: retq # sched: [1:1.00] 7922 ; 7923 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask1: 7924 ; SKX: # %bb.0: 7925 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7926 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:1.00] 7927 ; SKX-NEXT: retq # sched: [7:1.00] 7928 %vec = load <4 x i32>, <4 x i32>* %vp 7929 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1> 7930 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7931 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7932 ret <4 x i32> %res 7933 } 7934 7935 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %mask) { 7936 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1: 7937 ; GENERIC: # %bb.0: 7938 ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] 7939 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:0.50] 7940 ; GENERIC-NEXT: retq # sched: [1:1.00] 7941 ; 7942 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask1: 7943 ; SKX: # %bb.0: 7944 ; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] 7945 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:1.00] 7946 ; SKX-NEXT: retq # sched: [7:1.00] 7947 %vec = load <4 x i32>, <4 x i32>* %vp 7948 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1> 7949 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7950 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7951 ret <4 x i32> %res 7952 } 7953 7954 define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 7955 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2: 7956 ; GENERIC: # %bb.0: 7957 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 7958 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:0.50] 7959 ; GENERIC-NEXT: retq # sched: [1:1.00] 7960 ; 7961 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask2: 7962 ; SKX: # %bb.0: 7963 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 7964 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:1.00] 7965 ; SKX-NEXT: retq # sched: [7:1.00] 7966 %vec = load <4 x i32>, <4 x i32>* %vp 7967 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1> 7968 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7969 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 7970 ret <4 x i32> %res 7971 } 7972 7973 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %mask) { 7974 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2: 7975 ; GENERIC: # %bb.0: 7976 ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] 7977 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:0.50] 7978 ; GENERIC-NEXT: retq # sched: [1:1.00] 7979 ; 7980 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask2: 7981 ; SKX: # %bb.0: 7982 ; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] 7983 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:1.00] 7984 ; SKX-NEXT: retq # sched: [7:1.00] 7985 %vec = load <4 x i32>, <4 x i32>* %vp 7986 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1> 7987 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 7988 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 7989 ret <4 x i32> %res 7990 } 7991 7992 define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) { 7993 ; GENERIC-LABEL: test_4xi32_perm_mem_mask3: 7994 ; GENERIC: # %bb.0: 7995 ; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] sched: [7:1.00] 7996 ; GENERIC-NEXT: retq # sched: [1:1.00] 7997 ; 7998 ; SKX-LABEL: test_4xi32_perm_mem_mask3: 7999 ; SKX: # %bb.0: 8000 ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] sched: [7:1.00] 8001 ; SKX-NEXT: retq # sched: [7:1.00] 8002 %vec = load <4 x i32>, <4 x i32>* %vp 8003 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 8004 ret <4 x i32> %res 8005 } 8006 define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { 8007 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3: 8008 ; GENERIC: # %bb.0: 8009 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 8010 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:0.50] 8011 ; GENERIC-NEXT: retq # sched: [1:1.00] 8012 ; 8013 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask3: 8014 ; SKX: # %bb.0: 8015 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 8016 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:1.00] 8017 ; SKX-NEXT: retq # sched: [7:1.00] 8018 %vec = load <4 x i32>, <4 x i32>* %vp 8019 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 8020 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 8021 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 8022 ret <4 x i32> %res 8023 } 8024 8025 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %mask) { 8026 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3: 8027 ; GENERIC: # %bb.0: 8028 ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] 8029 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:0.50] 8030 ; GENERIC-NEXT: retq # sched: [1:1.00] 8031 ; 8032 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask3: 8033 ; SKX: # %bb.0: 8034 ; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] 8035 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:1.00] 8036 ; SKX-NEXT: retq # sched: [7:1.00] 8037 %vec = load <4 x i32>, <4 x i32>* %vp 8038 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 8039 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 8040 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 8041 ret <4 x i32> %res 8042 } 8043 8044 define <8 x i32> @test2_8xi32_perm_mask0(<8 x i32> %vec) { 8045 ; GENERIC-LABEL: test2_8xi32_perm_mask0: 8046 ; GENERIC: # %bb.0: 8047 ; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8048 ; GENERIC-NEXT: retq # sched: [1:1.00] 8049 ; 8050 ; SKX-LABEL: test2_8xi32_perm_mask0: 8051 ; SKX: # %bb.0: 8052 ; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8053 ; SKX-NEXT: retq # sched: [7:1.00] 8054 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 8055 ret <8 x i32> %res 8056 } 8057 define <8 x i32> @test2_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 8058 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask0: 8059 ; GENERIC: # %bb.0: 8060 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8061 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8062 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 8063 ; GENERIC-NEXT: retq # sched: [1:1.00] 8064 ; 8065 ; SKX-LABEL: test2_masked_8xi32_perm_mask0: 8066 ; SKX: # %bb.0: 8067 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8068 ; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8069 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 8070 ; SKX-NEXT: retq # sched: [7:1.00] 8071 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 8072 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8073 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8074 ret <8 x i32> %res 8075 } 8076 8077 define <8 x i32> @test2_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { 8078 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask0: 8079 ; GENERIC: # %bb.0: 8080 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8081 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8082 ; GENERIC-NEXT: retq # sched: [1:1.00] 8083 ; 8084 ; SKX-LABEL: test2_masked_z_8xi32_perm_mask0: 8085 ; SKX: # %bb.0: 8086 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8087 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] 8088 ; SKX-NEXT: retq # sched: [7:1.00] 8089 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 8090 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8091 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8092 ret <8 x i32> %res 8093 } 8094 define <8 x i32> @test2_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 8095 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask1: 8096 ; GENERIC: # %bb.0: 8097 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8098 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] 8099 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 8100 ; GENERIC-NEXT: retq # sched: [1:1.00] 8101 ; 8102 ; SKX-LABEL: test2_masked_8xi32_perm_mask1: 8103 ; SKX: # %bb.0: 8104 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8105 ; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] 8106 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 8107 ; SKX-NEXT: retq # sched: [7:1.00] 8108 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7> 8109 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8110 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8111 ret <8 x i32> %res 8112 } 8113 8114 define <8 x i32> @test2_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { 8115 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask1: 8116 ; GENERIC: # %bb.0: 8117 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8118 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] 8119 ; GENERIC-NEXT: retq # sched: [1:1.00] 8120 ; 8121 ; SKX-LABEL: test2_masked_z_8xi32_perm_mask1: 8122 ; SKX: # %bb.0: 8123 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8124 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] 8125 ; SKX-NEXT: retq # sched: [7:1.00] 8126 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7> 8127 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8128 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8129 ret <8 x i32> %res 8130 } 8131 define <8 x i32> @test2_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 8132 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask2: 8133 ; GENERIC: # %bb.0: 8134 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8135 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] 8136 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 8137 ; GENERIC-NEXT: retq # sched: [1:1.00] 8138 ; 8139 ; SKX-LABEL: test2_masked_8xi32_perm_mask2: 8140 ; SKX: # %bb.0: 8141 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8142 ; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] 8143 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 8144 ; SKX-NEXT: retq # sched: [7:1.00] 8145 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7> 8146 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8147 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8148 ret <8 x i32> %res 8149 } 8150 8151 define <8 x i32> @test2_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { 8152 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask2: 8153 ; GENERIC: # %bb.0: 8154 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8155 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] 8156 ; GENERIC-NEXT: retq # sched: [1:1.00] 8157 ; 8158 ; SKX-LABEL: test2_masked_z_8xi32_perm_mask2: 8159 ; SKX: # %bb.0: 8160 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8161 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] 8162 ; SKX-NEXT: retq # sched: [7:1.00] 8163 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7> 8164 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8165 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8166 ret <8 x i32> %res 8167 } 8168 define <8 x i32> @test2_8xi32_perm_mask3(<8 x i32> %vec) { 8169 ; GENERIC-LABEL: test2_8xi32_perm_mask3: 8170 ; GENERIC: # %bb.0: 8171 ; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8172 ; GENERIC-NEXT: retq # sched: [1:1.00] 8173 ; 8174 ; SKX-LABEL: test2_8xi32_perm_mask3: 8175 ; SKX: # %bb.0: 8176 ; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8177 ; SKX-NEXT: retq # sched: [7:1.00] 8178 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 8179 ret <8 x i32> %res 8180 } 8181 define <8 x i32> @test2_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { 8182 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask3: 8183 ; GENERIC: # %bb.0: 8184 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8185 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8186 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 8187 ; GENERIC-NEXT: retq # sched: [1:1.00] 8188 ; 8189 ; SKX-LABEL: test2_masked_8xi32_perm_mask3: 8190 ; SKX: # %bb.0: 8191 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8192 ; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8193 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 8194 ; SKX-NEXT: retq # sched: [7:1.00] 8195 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 8196 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8197 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8198 ret <8 x i32> %res 8199 } 8200 8201 define <8 x i32> @test2_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { 8202 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask3: 8203 ; GENERIC: # %bb.0: 8204 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8205 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8206 ; GENERIC-NEXT: retq # sched: [1:1.00] 8207 ; 8208 ; SKX-LABEL: test2_masked_z_8xi32_perm_mask3: 8209 ; SKX: # %bb.0: 8210 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8211 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] 8212 ; SKX-NEXT: retq # sched: [7:1.00] 8213 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4> 8214 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8215 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8216 ret <8 x i32> %res 8217 } 8218 define <8 x i32> @test2_8xi32_perm_mem_mask0(<8 x i32>* %vp) { 8219 ; GENERIC-LABEL: test2_8xi32_perm_mem_mask0: 8220 ; GENERIC: # %bb.0: 8221 ; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8222 ; GENERIC-NEXT: retq # sched: [1:1.00] 8223 ; 8224 ; SKX-LABEL: test2_8xi32_perm_mem_mask0: 8225 ; SKX: # %bb.0: 8226 ; SKX-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8227 ; SKX-NEXT: retq # sched: [7:1.00] 8228 %vec = load <8 x i32>, <8 x i32>* %vp 8229 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 8230 ret <8 x i32> %res 8231 } 8232 define <8 x i32> @test2_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 8233 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask0: 8234 ; GENERIC: # %bb.0: 8235 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8236 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8237 ; GENERIC-NEXT: retq # sched: [1:1.00] 8238 ; 8239 ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask0: 8240 ; SKX: # %bb.0: 8241 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8242 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8243 ; SKX-NEXT: retq # sched: [7:1.00] 8244 %vec = load <8 x i32>, <8 x i32>* %vp 8245 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 8246 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8247 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8248 ret <8 x i32> %res 8249 } 8250 8251 define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { 8252 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask0: 8253 ; GENERIC: # %bb.0: 8254 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 8255 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8256 ; GENERIC-NEXT: retq # sched: [1:1.00] 8257 ; 8258 ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask0: 8259 ; SKX: # %bb.0: 8260 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 8261 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] 8262 ; SKX-NEXT: retq # sched: [7:1.00] 8263 %vec = load <8 x i32>, <8 x i32>* %vp 8264 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4> 8265 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8266 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8267 ret <8 x i32> %res 8268 } 8269 8270 define <8 x i32> @test2_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 8271 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask1: 8272 ; GENERIC: # %bb.0: 8273 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8274 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 8275 ; GENERIC-NEXT: retq # sched: [1:1.00] 8276 ; 8277 ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask1: 8278 ; SKX: # %bb.0: 8279 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8280 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 8281 ; SKX-NEXT: retq # sched: [7:1.00] 8282 %vec = load <8 x i32>, <8 x i32>* %vp 8283 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 8284 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8285 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8286 ret <8 x i32> %res 8287 } 8288 8289 define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { 8290 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask1: 8291 ; GENERIC: # %bb.0: 8292 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 8293 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 8294 ; GENERIC-NEXT: retq # sched: [1:1.00] 8295 ; 8296 ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask1: 8297 ; SKX: # %bb.0: 8298 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 8299 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] 8300 ; SKX-NEXT: retq # sched: [7:1.00] 8301 %vec = load <8 x i32>, <8 x i32>* %vp 8302 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4> 8303 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8304 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8305 ret <8 x i32> %res 8306 } 8307 8308 define <8 x i32> @test2_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 8309 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask2: 8310 ; GENERIC: # %bb.0: 8311 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8312 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] 8313 ; GENERIC-NEXT: retq # sched: [1:1.00] 8314 ; 8315 ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask2: 8316 ; SKX: # %bb.0: 8317 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8318 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] 8319 ; SKX-NEXT: retq # sched: [7:1.00] 8320 %vec = load <8 x i32>, <8 x i32>* %vp 8321 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5> 8322 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8323 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8324 ret <8 x i32> %res 8325 } 8326 8327 define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { 8328 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask2: 8329 ; GENERIC: # %bb.0: 8330 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 8331 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] 8332 ; GENERIC-NEXT: retq # sched: [1:1.00] 8333 ; 8334 ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask2: 8335 ; SKX: # %bb.0: 8336 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 8337 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] 8338 ; SKX-NEXT: retq # sched: [7:1.00] 8339 %vec = load <8 x i32>, <8 x i32>* %vp 8340 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5> 8341 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8342 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8343 ret <8 x i32> %res 8344 } 8345 8346 define <8 x i32> @test2_8xi32_perm_mem_mask3(<8 x i32>* %vp) { 8347 ; GENERIC-LABEL: test2_8xi32_perm_mem_mask3: 8348 ; GENERIC: # %bb.0: 8349 ; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8350 ; GENERIC-NEXT: retq # sched: [1:1.00] 8351 ; 8352 ; SKX-LABEL: test2_8xi32_perm_mem_mask3: 8353 ; SKX: # %bb.0: 8354 ; SKX-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8355 ; SKX-NEXT: retq # sched: [7:1.00] 8356 %vec = load <8 x i32>, <8 x i32>* %vp 8357 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 8358 ret <8 x i32> %res 8359 } 8360 define <8 x i32> @test2_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { 8361 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask3: 8362 ; GENERIC: # %bb.0: 8363 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8364 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8365 ; GENERIC-NEXT: retq # sched: [1:1.00] 8366 ; 8367 ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask3: 8368 ; SKX: # %bb.0: 8369 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8370 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8371 ; SKX-NEXT: retq # sched: [7:1.00] 8372 %vec = load <8 x i32>, <8 x i32>* %vp 8373 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 8374 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8375 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 8376 ret <8 x i32> %res 8377 } 8378 8379 define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { 8380 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask3: 8381 ; GENERIC: # %bb.0: 8382 ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 8383 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8384 ; GENERIC-NEXT: retq # sched: [1:1.00] 8385 ; 8386 ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask3: 8387 ; SKX: # %bb.0: 8388 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 8389 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] 8390 ; SKX-NEXT: retq # sched: [7:1.00] 8391 %vec = load <8 x i32>, <8 x i32>* %vp 8392 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4> 8393 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8394 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 8395 ret <8 x i32> %res 8396 } 8397 8398 define <16 x i32> @test2_16xi32_perm_mask0(<16 x i32> %vec) { 8399 ; GENERIC-LABEL: test2_16xi32_perm_mask0: 8400 ; GENERIC: # %bb.0: 8401 ; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8402 ; GENERIC-NEXT: retq # sched: [1:1.00] 8403 ; 8404 ; SKX-LABEL: test2_16xi32_perm_mask0: 8405 ; SKX: # %bb.0: 8406 ; SKX-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8407 ; SKX-NEXT: retq # sched: [7:1.00] 8408 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 8409 ret <16 x i32> %res 8410 } 8411 define <16 x i32> @test2_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 8412 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask0: 8413 ; GENERIC: # %bb.0: 8414 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 8415 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8416 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 8417 ; GENERIC-NEXT: retq # sched: [1:1.00] 8418 ; 8419 ; SKX-LABEL: test2_masked_16xi32_perm_mask0: 8420 ; SKX: # %bb.0: 8421 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 8422 ; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8423 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 8424 ; SKX-NEXT: retq # sched: [7:1.00] 8425 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 8426 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8427 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8428 ret <16 x i32> %res 8429 } 8430 8431 define <16 x i32> @test2_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { 8432 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask0: 8433 ; GENERIC: # %bb.0: 8434 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8435 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8436 ; GENERIC-NEXT: retq # sched: [1:1.00] 8437 ; 8438 ; SKX-LABEL: test2_masked_z_16xi32_perm_mask0: 8439 ; SKX: # %bb.0: 8440 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8441 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] 8442 ; SKX-NEXT: retq # sched: [7:1.00] 8443 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12> 8444 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8445 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8446 ret <16 x i32> %res 8447 } 8448 define <16 x i32> @test2_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 8449 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask1: 8450 ; GENERIC: # %bb.0: 8451 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 8452 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] 8453 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 8454 ; GENERIC-NEXT: retq # sched: [1:1.00] 8455 ; 8456 ; SKX-LABEL: test2_masked_16xi32_perm_mask1: 8457 ; SKX: # %bb.0: 8458 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 8459 ; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] 8460 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 8461 ; SKX-NEXT: retq # sched: [7:1.00] 8462 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12> 8463 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8464 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8465 ret <16 x i32> %res 8466 } 8467 8468 define <16 x i32> @test2_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { 8469 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask1: 8470 ; GENERIC: # %bb.0: 8471 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8472 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] 8473 ; GENERIC-NEXT: retq # sched: [1:1.00] 8474 ; 8475 ; SKX-LABEL: test2_masked_z_16xi32_perm_mask1: 8476 ; SKX: # %bb.0: 8477 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8478 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] 8479 ; SKX-NEXT: retq # sched: [7:1.00] 8480 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12> 8481 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8482 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8483 ret <16 x i32> %res 8484 } 8485 define <16 x i32> @test2_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 8486 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask2: 8487 ; GENERIC: # %bb.0: 8488 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 8489 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] 8490 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 8491 ; GENERIC-NEXT: retq # sched: [1:1.00] 8492 ; 8493 ; SKX-LABEL: test2_masked_16xi32_perm_mask2: 8494 ; SKX: # %bb.0: 8495 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 8496 ; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] 8497 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 8498 ; SKX-NEXT: retq # sched: [7:1.00] 8499 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12> 8500 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8501 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8502 ret <16 x i32> %res 8503 } 8504 8505 define <16 x i32> @test2_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { 8506 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask2: 8507 ; GENERIC: # %bb.0: 8508 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8509 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] 8510 ; GENERIC-NEXT: retq # sched: [1:1.00] 8511 ; 8512 ; SKX-LABEL: test2_masked_z_16xi32_perm_mask2: 8513 ; SKX: # %bb.0: 8514 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8515 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] 8516 ; SKX-NEXT: retq # sched: [7:1.00] 8517 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12> 8518 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8519 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8520 ret <16 x i32> %res 8521 } 8522 define <16 x i32> @test2_16xi32_perm_mask3(<16 x i32> %vec) { 8523 ; GENERIC-LABEL: test2_16xi32_perm_mask3: 8524 ; GENERIC: # %bb.0: 8525 ; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8526 ; GENERIC-NEXT: retq # sched: [1:1.00] 8527 ; 8528 ; SKX-LABEL: test2_16xi32_perm_mask3: 8529 ; SKX: # %bb.0: 8530 ; SKX-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8531 ; SKX-NEXT: retq # sched: [7:1.00] 8532 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 8533 ret <16 x i32> %res 8534 } 8535 define <16 x i32> @test2_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { 8536 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask3: 8537 ; GENERIC: # %bb.0: 8538 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 8539 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8540 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 8541 ; GENERIC-NEXT: retq # sched: [1:1.00] 8542 ; 8543 ; SKX-LABEL: test2_masked_16xi32_perm_mask3: 8544 ; SKX: # %bb.0: 8545 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 8546 ; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8547 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 8548 ; SKX-NEXT: retq # sched: [7:1.00] 8549 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 8550 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8551 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8552 ret <16 x i32> %res 8553 } 8554 8555 define <16 x i32> @test2_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { 8556 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask3: 8557 ; GENERIC: # %bb.0: 8558 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8559 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8560 ; GENERIC-NEXT: retq # sched: [1:1.00] 8561 ; 8562 ; SKX-LABEL: test2_masked_z_16xi32_perm_mask3: 8563 ; SKX: # %bb.0: 8564 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8565 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] 8566 ; SKX-NEXT: retq # sched: [7:1.00] 8567 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15> 8568 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8569 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8570 ret <16 x i32> %res 8571 } 8572 define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) { 8573 ; GENERIC-LABEL: test2_16xi32_perm_mem_mask0: 8574 ; GENERIC: # %bb.0: 8575 ; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8576 ; GENERIC-NEXT: retq # sched: [1:1.00] 8577 ; 8578 ; SKX-LABEL: test2_16xi32_perm_mem_mask0: 8579 ; SKX: # %bb.0: 8580 ; SKX-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8581 ; SKX-NEXT: retq # sched: [7:1.00] 8582 %vec = load <16 x i32>, <16 x i32>* %vp 8583 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 8584 ret <16 x i32> %res 8585 } 8586 define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 8587 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0: 8588 ; GENERIC: # %bb.0: 8589 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8590 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8591 ; GENERIC-NEXT: retq # sched: [1:1.00] 8592 ; 8593 ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask0: 8594 ; SKX: # %bb.0: 8595 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8596 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8597 ; SKX-NEXT: retq # sched: [7:1.00] 8598 %vec = load <16 x i32>, <16 x i32>* %vp 8599 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 8600 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8601 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8602 ret <16 x i32> %res 8603 } 8604 8605 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { 8606 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0: 8607 ; GENERIC: # %bb.0: 8608 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 8609 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8610 ; GENERIC-NEXT: retq # sched: [1:1.00] 8611 ; 8612 ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask0: 8613 ; SKX: # %bb.0: 8614 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 8615 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] 8616 ; SKX-NEXT: retq # sched: [7:1.00] 8617 %vec = load <16 x i32>, <16 x i32>* %vp 8618 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15> 8619 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8620 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8621 ret <16 x i32> %res 8622 } 8623 8624 define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 8625 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1: 8626 ; GENERIC: # %bb.0: 8627 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8628 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] 8629 ; GENERIC-NEXT: retq # sched: [1:1.00] 8630 ; 8631 ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask1: 8632 ; SKX: # %bb.0: 8633 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8634 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] 8635 ; SKX-NEXT: retq # sched: [7:1.00] 8636 %vec = load <16 x i32>, <16 x i32>* %vp 8637 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14> 8638 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8639 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8640 ret <16 x i32> %res 8641 } 8642 8643 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { 8644 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1: 8645 ; GENERIC: # %bb.0: 8646 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 8647 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] 8648 ; GENERIC-NEXT: retq # sched: [1:1.00] 8649 ; 8650 ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask1: 8651 ; SKX: # %bb.0: 8652 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 8653 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] 8654 ; SKX-NEXT: retq # sched: [7:1.00] 8655 %vec = load <16 x i32>, <16 x i32>* %vp 8656 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14> 8657 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8658 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8659 ret <16 x i32> %res 8660 } 8661 8662 define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 8663 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2: 8664 ; GENERIC: # %bb.0: 8665 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8666 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] 8667 ; GENERIC-NEXT: retq # sched: [1:1.00] 8668 ; 8669 ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask2: 8670 ; SKX: # %bb.0: 8671 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8672 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] 8673 ; SKX-NEXT: retq # sched: [7:1.00] 8674 %vec = load <16 x i32>, <16 x i32>* %vp 8675 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14> 8676 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8677 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8678 ret <16 x i32> %res 8679 } 8680 8681 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { 8682 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2: 8683 ; GENERIC: # %bb.0: 8684 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 8685 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] 8686 ; GENERIC-NEXT: retq # sched: [1:1.00] 8687 ; 8688 ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask2: 8689 ; SKX: # %bb.0: 8690 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 8691 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] 8692 ; SKX-NEXT: retq # sched: [7:1.00] 8693 %vec = load <16 x i32>, <16 x i32>* %vp 8694 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14> 8695 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8696 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8697 ret <16 x i32> %res 8698 } 8699 8700 define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) { 8701 ; GENERIC-LABEL: test2_16xi32_perm_mem_mask3: 8702 ; GENERIC: # %bb.0: 8703 ; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8704 ; GENERIC-NEXT: retq # sched: [1:1.00] 8705 ; 8706 ; SKX-LABEL: test2_16xi32_perm_mem_mask3: 8707 ; SKX: # %bb.0: 8708 ; SKX-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8709 ; SKX-NEXT: retq # sched: [7:1.00] 8710 %vec = load <16 x i32>, <16 x i32>* %vp 8711 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 8712 ret <16 x i32> %res 8713 } 8714 define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { 8715 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3: 8716 ; GENERIC: # %bb.0: 8717 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8718 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8719 ; GENERIC-NEXT: retq # sched: [1:1.00] 8720 ; 8721 ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask3: 8722 ; SKX: # %bb.0: 8723 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8724 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8725 ; SKX-NEXT: retq # sched: [7:1.00] 8726 %vec = load <16 x i32>, <16 x i32>* %vp 8727 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 8728 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8729 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 8730 ret <16 x i32> %res 8731 } 8732 8733 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { 8734 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3: 8735 ; GENERIC: # %bb.0: 8736 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 8737 ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8738 ; GENERIC-NEXT: retq # sched: [1:1.00] 8739 ; 8740 ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask3: 8741 ; SKX: # %bb.0: 8742 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 8743 ; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] 8744 ; SKX-NEXT: retq # sched: [7:1.00] 8745 %vec = load <16 x i32>, <16 x i32>* %vp 8746 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13> 8747 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 8748 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 8749 ret <16 x i32> %res 8750 } 8751 8752 define <8 x float> @test2_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2) { 8753 ; GENERIC-LABEL: test2_8xfloat_shuff_mask0: 8754 ; GENERIC: # %bb.0: 8755 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 8756 ; GENERIC-NEXT: retq # sched: [1:1.00] 8757 ; 8758 ; SKX-LABEL: test2_8xfloat_shuff_mask0: 8759 ; SKX: # %bb.0: 8760 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 8761 ; SKX-NEXT: retq # sched: [7:1.00] 8762 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8763 ret <8 x float> %res 8764 } 8765 define <8 x float> @test2_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 8766 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask0: 8767 ; GENERIC: # %bb.0: 8768 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 8769 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8770 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 8771 ; GENERIC-NEXT: retq # sched: [1:1.00] 8772 ; 8773 ; SKX-LABEL: test2_8xfloat_masked_shuff_mask0: 8774 ; SKX: # %bb.0: 8775 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 8776 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8777 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 8778 ; SKX-NEXT: retq # sched: [7:1.00] 8779 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8780 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8781 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8782 ret <8 x float> %res 8783 } 8784 8785 define <8 x float> @test2_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 8786 ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask0: 8787 ; GENERIC: # %bb.0: 8788 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8789 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8790 ; GENERIC-NEXT: retq # sched: [1:1.00] 8791 ; 8792 ; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask0: 8793 ; SKX: # %bb.0: 8794 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8795 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8796 ; SKX-NEXT: retq # sched: [7:1.00] 8797 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8798 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8799 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 8800 ret <8 x float> %res 8801 } 8802 define <8 x float> @test2_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 8803 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask1: 8804 ; GENERIC: # %bb.0: 8805 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 8806 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8807 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 8808 ; GENERIC-NEXT: retq # sched: [1:1.00] 8809 ; 8810 ; SKX-LABEL: test2_8xfloat_masked_shuff_mask1: 8811 ; SKX: # %bb.0: 8812 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 8813 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8814 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 8815 ; SKX-NEXT: retq # sched: [7:1.00] 8816 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8817 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8818 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8819 ret <8 x float> %res 8820 } 8821 8822 define <8 x float> @test2_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 8823 ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask1: 8824 ; GENERIC: # %bb.0: 8825 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8826 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8827 ; GENERIC-NEXT: retq # sched: [1:1.00] 8828 ; 8829 ; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask1: 8830 ; SKX: # %bb.0: 8831 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8832 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8833 ; SKX-NEXT: retq # sched: [7:1.00] 8834 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8835 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8836 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 8837 ret <8 x float> %res 8838 } 8839 define <8 x float> @test2_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 8840 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask2: 8841 ; GENERIC: # %bb.0: 8842 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 8843 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 8844 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 8845 ; GENERIC-NEXT: retq # sched: [1:1.00] 8846 ; 8847 ; SKX-LABEL: test2_8xfloat_masked_shuff_mask2: 8848 ; SKX: # %bb.0: 8849 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 8850 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 8851 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 8852 ; SKX-NEXT: retq # sched: [7:1.00] 8853 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8854 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8855 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8856 ret <8 x float> %res 8857 } 8858 8859 define <8 x float> @test2_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 8860 ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask2: 8861 ; GENERIC: # %bb.0: 8862 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8863 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 8864 ; GENERIC-NEXT: retq # sched: [1:1.00] 8865 ; 8866 ; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask2: 8867 ; SKX: # %bb.0: 8868 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8869 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 8870 ; SKX-NEXT: retq # sched: [7:1.00] 8871 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8872 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8873 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 8874 ret <8 x float> %res 8875 } 8876 define <8 x float> @test2_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2) { 8877 ; GENERIC-LABEL: test2_8xfloat_shuff_mask3: 8878 ; GENERIC: # %bb.0: 8879 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 8880 ; GENERIC-NEXT: retq # sched: [1:1.00] 8881 ; 8882 ; SKX-LABEL: test2_8xfloat_shuff_mask3: 8883 ; SKX: # %bb.0: 8884 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 8885 ; SKX-NEXT: retq # sched: [7:1.00] 8886 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8887 ret <8 x float> %res 8888 } 8889 define <8 x float> @test2_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 8890 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask3: 8891 ; GENERIC: # %bb.0: 8892 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 8893 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8894 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 8895 ; GENERIC-NEXT: retq # sched: [1:1.00] 8896 ; 8897 ; SKX-LABEL: test2_8xfloat_masked_shuff_mask3: 8898 ; SKX: # %bb.0: 8899 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 8900 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8901 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 8902 ; SKX-NEXT: retq # sched: [7:1.00] 8903 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8904 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8905 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8906 ret <8 x float> %res 8907 } 8908 8909 define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 8910 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mask3: 8911 ; GENERIC: # %bb.0: 8912 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8913 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 8914 ; GENERIC-NEXT: retq # sched: [1:1.00] 8915 ; 8916 ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mask3: 8917 ; SKX: # %bb.0: 8918 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8919 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 8920 ; SKX-NEXT: retq # sched: [7:1.00] 8921 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 8922 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8923 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 8924 ret <8 x float> %res 8925 } 8926 define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { 8927 ; GENERIC-LABEL: test_8xfloat_shuff_mem_mask0: 8928 ; GENERIC: # %bb.0: 8929 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 8930 ; GENERIC-NEXT: retq # sched: [1:1.00] 8931 ; 8932 ; SKX-LABEL: test_8xfloat_shuff_mem_mask0: 8933 ; SKX: # %bb.0: 8934 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 8935 ; SKX-NEXT: retq # sched: [7:1.00] 8936 %vec2 = load <8 x float>, <8 x float>* %vec2p 8937 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8938 ret <8 x float> %res 8939 } 8940 define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 8941 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0: 8942 ; GENERIC: # %bb.0: 8943 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8944 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 8945 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 8946 ; GENERIC-NEXT: retq # sched: [1:1.00] 8947 ; 8948 ; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask0: 8949 ; SKX: # %bb.0: 8950 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8951 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 8952 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 8953 ; SKX-NEXT: retq # sched: [7:1.00] 8954 %vec2 = load <8 x float>, <8 x float>* %vec2p 8955 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8956 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8957 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8958 ret <8 x float> %res 8959 } 8960 8961 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 8962 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: 8963 ; GENERIC: # %bb.0: 8964 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8965 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 8966 ; GENERIC-NEXT: retq # sched: [1:1.00] 8967 ; 8968 ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: 8969 ; SKX: # %bb.0: 8970 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8971 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 8972 ; SKX-NEXT: retq # sched: [7:1.00] 8973 %vec2 = load <8 x float>, <8 x float>* %vec2p 8974 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8975 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8976 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 8977 ret <8 x float> %res 8978 } 8979 8980 define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 8981 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1: 8982 ; GENERIC: # %bb.0: 8983 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8984 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 8985 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 8986 ; GENERIC-NEXT: retq # sched: [1:1.00] 8987 ; 8988 ; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask1: 8989 ; SKX: # %bb.0: 8990 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8991 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 8992 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 8993 ; SKX-NEXT: retq # sched: [7:1.00] 8994 %vec2 = load <8 x float>, <8 x float>* %vec2p 8995 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 8996 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 8997 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 8998 ret <8 x float> %res 8999 } 9000 9001 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 9002 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: 9003 ; GENERIC: # %bb.0: 9004 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 9005 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 9006 ; GENERIC-NEXT: retq # sched: [1:1.00] 9007 ; 9008 ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: 9009 ; SKX: # %bb.0: 9010 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 9011 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 9012 ; SKX-NEXT: retq # sched: [7:1.00] 9013 %vec2 = load <8 x float>, <8 x float>* %vec2p 9014 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 9015 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 9016 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 9017 ret <8 x float> %res 9018 } 9019 9020 define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 9021 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2: 9022 ; GENERIC: # %bb.0: 9023 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 9024 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 9025 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 9026 ; GENERIC-NEXT: retq # sched: [1:1.00] 9027 ; 9028 ; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask2: 9029 ; SKX: # %bb.0: 9030 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 9031 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 9032 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 9033 ; SKX-NEXT: retq # sched: [7:1.00] 9034 %vec2 = load <8 x float>, <8 x float>* %vec2p 9035 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 9036 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 9037 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 9038 ret <8 x float> %res 9039 } 9040 9041 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 9042 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: 9043 ; GENERIC: # %bb.0: 9044 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 9045 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 9046 ; GENERIC-NEXT: retq # sched: [1:1.00] 9047 ; 9048 ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: 9049 ; SKX: # %bb.0: 9050 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 9051 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 9052 ; SKX-NEXT: retq # sched: [7:1.00] 9053 %vec2 = load <8 x float>, <8 x float>* %vec2p 9054 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 9055 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 9056 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 9057 ret <8 x float> %res 9058 } 9059 9060 define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { 9061 ; GENERIC-LABEL: test_8xfloat_shuff_mem_mask3: 9062 ; GENERIC: # %bb.0: 9063 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] 9064 ; GENERIC-NEXT: retq # sched: [1:1.00] 9065 ; 9066 ; SKX-LABEL: test_8xfloat_shuff_mem_mask3: 9067 ; SKX: # %bb.0: 9068 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 9069 ; SKX-NEXT: retq # sched: [7:1.00] 9070 %vec2 = load <8 x float>, <8 x float>* %vec2p 9071 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 9072 ret <8 x float> %res 9073 } 9074 define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 9075 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3: 9076 ; GENERIC: # %bb.0: 9077 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 9078 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 9079 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 9080 ; GENERIC-NEXT: retq # sched: [1:1.00] 9081 ; 9082 ; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask3: 9083 ; SKX: # %bb.0: 9084 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 9085 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 9086 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 9087 ; SKX-NEXT: retq # sched: [7:1.00] 9088 %vec2 = load <8 x float>, <8 x float>* %vec2p 9089 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 9090 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 9091 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 9092 ret <8 x float> %res 9093 } 9094 9095 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 9096 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: 9097 ; GENERIC: # %bb.0: 9098 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 9099 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 9100 ; GENERIC-NEXT: retq # sched: [1:1.00] 9101 ; 9102 ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: 9103 ; SKX: # %bb.0: 9104 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 9105 ; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 9106 ; SKX-NEXT: retq # sched: [7:1.00] 9107 %vec2 = load <8 x float>, <8 x float>* %vec2p 9108 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 9109 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 9110 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 9111 ret <8 x float> %res 9112 } 9113 9114 define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 9115 ; GENERIC-LABEL: test_16xfloat_shuff_mask0: 9116 ; GENERIC: # %bb.0: 9117 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 9118 ; GENERIC-NEXT: retq # sched: [1:1.00] 9119 ; 9120 ; SKX-LABEL: test_16xfloat_shuff_mask0: 9121 ; SKX: # %bb.0: 9122 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 9123 ; SKX-NEXT: retq # sched: [7:1.00] 9124 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 9125 ret <16 x float> %res 9126 } 9127 define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 9128 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask0: 9129 ; GENERIC: # %bb.0: 9130 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 9131 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 9132 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 9133 ; GENERIC-NEXT: retq # sched: [1:1.00] 9134 ; 9135 ; SKX-LABEL: test_16xfloat_masked_shuff_mask0: 9136 ; SKX: # %bb.0: 9137 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 9138 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 9139 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 9140 ; SKX-NEXT: retq # sched: [7:1.00] 9141 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 9142 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9143 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9144 ret <16 x float> %res 9145 } 9146 9147 define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 9148 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask0: 9149 ; GENERIC: # %bb.0: 9150 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9151 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 9152 ; GENERIC-NEXT: retq # sched: [1:1.00] 9153 ; 9154 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask0: 9155 ; SKX: # %bb.0: 9156 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9157 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 9158 ; SKX-NEXT: retq # sched: [7:1.00] 9159 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 9160 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9161 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9162 ret <16 x float> %res 9163 } 9164 define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 9165 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask1: 9166 ; GENERIC: # %bb.0: 9167 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 9168 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] 9169 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 9170 ; GENERIC-NEXT: retq # sched: [1:1.00] 9171 ; 9172 ; SKX-LABEL: test_16xfloat_masked_shuff_mask1: 9173 ; SKX: # %bb.0: 9174 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 9175 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [3:1.00] 9176 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 9177 ; SKX-NEXT: retq # sched: [7:1.00] 9178 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31> 9179 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9180 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9181 ret <16 x float> %res 9182 } 9183 9184 define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 9185 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask1: 9186 ; GENERIC: # %bb.0: 9187 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9188 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] 9189 ; GENERIC-NEXT: retq # sched: [1:1.00] 9190 ; 9191 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask1: 9192 ; SKX: # %bb.0: 9193 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9194 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [3:1.00] 9195 ; SKX-NEXT: retq # sched: [7:1.00] 9196 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31> 9197 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9198 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9199 ret <16 x float> %res 9200 } 9201 define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 9202 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask2: 9203 ; GENERIC: # %bb.0: 9204 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 9205 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] 9206 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 9207 ; GENERIC-NEXT: retq # sched: [1:1.00] 9208 ; 9209 ; SKX-LABEL: test_16xfloat_masked_shuff_mask2: 9210 ; SKX: # %bb.0: 9211 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 9212 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [3:1.00] 9213 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 9214 ; SKX-NEXT: retq # sched: [7:1.00] 9215 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 9216 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9217 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9218 ret <16 x float> %res 9219 } 9220 9221 define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 9222 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask2: 9223 ; GENERIC: # %bb.0: 9224 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9225 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] 9226 ; GENERIC-NEXT: retq # sched: [1:1.00] 9227 ; 9228 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask2: 9229 ; SKX: # %bb.0: 9230 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9231 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [3:1.00] 9232 ; SKX-NEXT: retq # sched: [7:1.00] 9233 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 9234 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9235 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9236 ret <16 x float> %res 9237 } 9238 define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) { 9239 ; GENERIC-LABEL: test_16xfloat_shuff_mask3: 9240 ; GENERIC: # %bb.0: 9241 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] 9242 ; GENERIC-NEXT: retq # sched: [1:1.00] 9243 ; 9244 ; SKX-LABEL: test_16xfloat_shuff_mask3: 9245 ; SKX: # %bb.0: 9246 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00] 9247 ; SKX-NEXT: retq # sched: [7:1.00] 9248 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 9249 ret <16 x float> %res 9250 } 9251 define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 9252 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask3: 9253 ; GENERIC: # %bb.0: 9254 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 9255 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] 9256 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 9257 ; GENERIC-NEXT: retq # sched: [1:1.00] 9258 ; 9259 ; SKX-LABEL: test_16xfloat_masked_shuff_mask3: 9260 ; SKX: # %bb.0: 9261 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 9262 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00] 9263 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 9264 ; SKX-NEXT: retq # sched: [7:1.00] 9265 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 9266 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9267 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9268 ret <16 x float> %res 9269 } 9270 9271 define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 9272 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask3: 9273 ; GENERIC: # %bb.0: 9274 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9275 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] 9276 ; GENERIC-NEXT: retq # sched: [1:1.00] 9277 ; 9278 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask3: 9279 ; SKX: # %bb.0: 9280 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9281 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00] 9282 ; SKX-NEXT: retq # sched: [7:1.00] 9283 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 9284 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9285 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9286 ret <16 x float> %res 9287 } 9288 define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { 9289 ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask0: 9290 ; GENERIC: # %bb.0: 9291 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] 9292 ; GENERIC-NEXT: retq # sched: [1:1.00] 9293 ; 9294 ; SKX-LABEL: test_16xfloat_shuff_mem_mask0: 9295 ; SKX: # %bb.0: 9296 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] 9297 ; SKX-NEXT: retq # sched: [7:1.00] 9298 %vec2 = load <16 x float>, <16 x float>* %vec2p 9299 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 9300 ret <16 x float> %res 9301 } 9302 define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 9303 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0: 9304 ; GENERIC: # %bb.0: 9305 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9306 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] 9307 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 9308 ; GENERIC-NEXT: retq # sched: [1:1.00] 9309 ; 9310 ; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask0: 9311 ; SKX: # %bb.0: 9312 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9313 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] 9314 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 9315 ; SKX-NEXT: retq # sched: [7:1.00] 9316 %vec2 = load <16 x float>, <16 x float>* %vec2p 9317 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 9318 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9319 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9320 ret <16 x float> %res 9321 } 9322 9323 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 9324 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: 9325 ; GENERIC: # %bb.0: 9326 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 9327 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] 9328 ; GENERIC-NEXT: retq # sched: [1:1.00] 9329 ; 9330 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: 9331 ; SKX: # %bb.0: 9332 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 9333 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] 9334 ; SKX-NEXT: retq # sched: [7:1.00] 9335 %vec2 = load <16 x float>, <16 x float>* %vec2p 9336 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 9337 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9338 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9339 ret <16 x float> %res 9340 } 9341 9342 define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 9343 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1: 9344 ; GENERIC: # %bb.0: 9345 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9346 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] 9347 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 9348 ; GENERIC-NEXT: retq # sched: [1:1.00] 9349 ; 9350 ; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask1: 9351 ; SKX: # %bb.0: 9352 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9353 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] 9354 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 9355 ; SKX-NEXT: retq # sched: [7:1.00] 9356 %vec2 = load <16 x float>, <16 x float>* %vec2p 9357 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 9358 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9359 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9360 ret <16 x float> %res 9361 } 9362 9363 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 9364 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: 9365 ; GENERIC: # %bb.0: 9366 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 9367 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] 9368 ; GENERIC-NEXT: retq # sched: [1:1.00] 9369 ; 9370 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: 9371 ; SKX: # %bb.0: 9372 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 9373 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] 9374 ; SKX-NEXT: retq # sched: [7:1.00] 9375 %vec2 = load <16 x float>, <16 x float>* %vec2p 9376 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 9377 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9378 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9379 ret <16 x float> %res 9380 } 9381 9382 define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 9383 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2: 9384 ; GENERIC: # %bb.0: 9385 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9386 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00] 9387 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 9388 ; GENERIC-NEXT: retq # sched: [1:1.00] 9389 ; 9390 ; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask2: 9391 ; SKX: # %bb.0: 9392 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9393 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [10:1.00] 9394 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 9395 ; SKX-NEXT: retq # sched: [7:1.00] 9396 %vec2 = load <16 x float>, <16 x float>* %vec2p 9397 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 24, i32 25, i32 26, i32 27> 9398 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9399 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9400 ret <16 x float> %res 9401 } 9402 9403 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 9404 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: 9405 ; GENERIC: # %bb.0: 9406 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 9407 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00] 9408 ; GENERIC-NEXT: retq # sched: [1:1.00] 9409 ; 9410 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: 9411 ; SKX: # %bb.0: 9412 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 9413 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [10:1.00] 9414 ; SKX-NEXT: retq # sched: [7:1.00] 9415 %vec2 = load <16 x float>, <16 x float>* %vec2p 9416 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 24, i32 25, i32 26, i32 27> 9417 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9418 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9419 ret <16 x float> %res 9420 } 9421 9422 define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { 9423 ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask3: 9424 ; GENERIC: # %bb.0: 9425 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] 9426 ; GENERIC-NEXT: retq # sched: [1:1.00] 9427 ; 9428 ; SKX-LABEL: test_16xfloat_shuff_mem_mask3: 9429 ; SKX: # %bb.0: 9430 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] 9431 ; SKX-NEXT: retq # sched: [7:1.00] 9432 %vec2 = load <16 x float>, <16 x float>* %vec2p 9433 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 9434 ret <16 x float> %res 9435 } 9436 define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 9437 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3: 9438 ; GENERIC: # %bb.0: 9439 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 9440 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] 9441 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 9442 ; GENERIC-NEXT: retq # sched: [1:1.00] 9443 ; 9444 ; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask3: 9445 ; SKX: # %bb.0: 9446 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 9447 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] 9448 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 9449 ; SKX-NEXT: retq # sched: [7:1.00] 9450 %vec2 = load <16 x float>, <16 x float>* %vec2p 9451 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 9452 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9453 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 9454 ret <16 x float> %res 9455 } 9456 9457 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 9458 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: 9459 ; GENERIC: # %bb.0: 9460 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 9461 ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] 9462 ; GENERIC-NEXT: retq # sched: [1:1.00] 9463 ; 9464 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: 9465 ; SKX: # %bb.0: 9466 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 9467 ; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] 9468 ; SKX-NEXT: retq # sched: [7:1.00] 9469 %vec2 = load <16 x float>, <16 x float>* %vec2p 9470 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 9471 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 9472 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 9473 ret <16 x float> %res 9474 } 9475 9476 define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2) { 9477 ; GENERIC-LABEL: test_4xdouble_shuff_mask0: 9478 ; GENERIC: # %bb.0: 9479 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 9480 ; GENERIC-NEXT: retq # sched: [1:1.00] 9481 ; 9482 ; SKX-LABEL: test_4xdouble_shuff_mask0: 9483 ; SKX: # %bb.0: 9484 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 9485 ; SKX-NEXT: retq # sched: [7:1.00] 9486 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9487 ret <4 x double> %res 9488 } 9489 define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 9490 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask0: 9491 ; GENERIC: # %bb.0: 9492 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 9493 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 9494 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 9495 ; GENERIC-NEXT: retq # sched: [1:1.00] 9496 ; 9497 ; SKX-LABEL: test_4xdouble_masked_shuff_mask0: 9498 ; SKX: # %bb.0: 9499 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 9500 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 9501 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 9502 ; SKX-NEXT: retq # sched: [7:1.00] 9503 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9504 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9505 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9506 ret <4 x double> %res 9507 } 9508 9509 define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 9510 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask0: 9511 ; GENERIC: # %bb.0: 9512 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9513 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 9514 ; GENERIC-NEXT: retq # sched: [1:1.00] 9515 ; 9516 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask0: 9517 ; SKX: # %bb.0: 9518 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9519 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 9520 ; SKX-NEXT: retq # sched: [7:1.00] 9521 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9522 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9523 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9524 ret <4 x double> %res 9525 } 9526 define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 9527 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask1: 9528 ; GENERIC: # %bb.0: 9529 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 9530 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 9531 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 9532 ; GENERIC-NEXT: retq # sched: [1:1.00] 9533 ; 9534 ; SKX-LABEL: test_4xdouble_masked_shuff_mask1: 9535 ; SKX: # %bb.0: 9536 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 9537 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 9538 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 9539 ; SKX-NEXT: retq # sched: [7:1.00] 9540 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9541 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9542 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9543 ret <4 x double> %res 9544 } 9545 9546 define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 9547 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask1: 9548 ; GENERIC: # %bb.0: 9549 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9550 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 9551 ; GENERIC-NEXT: retq # sched: [1:1.00] 9552 ; 9553 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask1: 9554 ; SKX: # %bb.0: 9555 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9556 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 9557 ; SKX-NEXT: retq # sched: [7:1.00] 9558 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9559 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9560 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9561 ret <4 x double> %res 9562 } 9563 define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 9564 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask2: 9565 ; GENERIC: # %bb.0: 9566 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 9567 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 9568 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 9569 ; GENERIC-NEXT: retq # sched: [1:1.00] 9570 ; 9571 ; SKX-LABEL: test_4xdouble_masked_shuff_mask2: 9572 ; SKX: # %bb.0: 9573 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 9574 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 9575 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 9576 ; SKX-NEXT: retq # sched: [7:1.00] 9577 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9578 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9579 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9580 ret <4 x double> %res 9581 } 9582 9583 define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 9584 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask2: 9585 ; GENERIC: # %bb.0: 9586 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9587 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 9588 ; GENERIC-NEXT: retq # sched: [1:1.00] 9589 ; 9590 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask2: 9591 ; SKX: # %bb.0: 9592 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9593 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 9594 ; SKX-NEXT: retq # sched: [7:1.00] 9595 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9596 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9597 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9598 ret <4 x double> %res 9599 } 9600 define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2) { 9601 ; GENERIC-LABEL: test_4xdouble_shuff_mask3: 9602 ; GENERIC: # %bb.0: 9603 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 9604 ; GENERIC-NEXT: retq # sched: [1:1.00] 9605 ; 9606 ; SKX-LABEL: test_4xdouble_shuff_mask3: 9607 ; SKX: # %bb.0: 9608 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 9609 ; SKX-NEXT: retq # sched: [7:1.00] 9610 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9611 ret <4 x double> %res 9612 } 9613 define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 9614 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask3: 9615 ; GENERIC: # %bb.0: 9616 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 9617 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 9618 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 9619 ; GENERIC-NEXT: retq # sched: [1:1.00] 9620 ; 9621 ; SKX-LABEL: test_4xdouble_masked_shuff_mask3: 9622 ; SKX: # %bb.0: 9623 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 9624 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 9625 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 9626 ; SKX-NEXT: retq # sched: [7:1.00] 9627 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9628 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9629 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9630 ret <4 x double> %res 9631 } 9632 9633 define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 9634 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask3: 9635 ; GENERIC: # %bb.0: 9636 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9637 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 9638 ; GENERIC-NEXT: retq # sched: [1:1.00] 9639 ; 9640 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask3: 9641 ; SKX: # %bb.0: 9642 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9643 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 9644 ; SKX-NEXT: retq # sched: [7:1.00] 9645 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9646 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9647 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9648 ret <4 x double> %res 9649 } 9650 define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { 9651 ; GENERIC-LABEL: test_4xdouble_shuff_mem_mask0: 9652 ; GENERIC: # %bb.0: 9653 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 9654 ; GENERIC-NEXT: retq # sched: [1:1.00] 9655 ; 9656 ; SKX-LABEL: test_4xdouble_shuff_mem_mask0: 9657 ; SKX: # %bb.0: 9658 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 9659 ; SKX-NEXT: retq # sched: [7:1.00] 9660 %vec2 = load <4 x double>, <4 x double>* %vec2p 9661 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9662 ret <4 x double> %res 9663 } 9664 define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 9665 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0: 9666 ; GENERIC: # %bb.0: 9667 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9668 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] 9669 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 9670 ; GENERIC-NEXT: retq # sched: [1:1.00] 9671 ; 9672 ; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask0: 9673 ; SKX: # %bb.0: 9674 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9675 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] 9676 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 9677 ; SKX-NEXT: retq # sched: [7:1.00] 9678 %vec2 = load <4 x double>, <4 x double>* %vec2p 9679 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9680 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9681 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9682 ret <4 x double> %res 9683 } 9684 9685 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 9686 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: 9687 ; GENERIC: # %bb.0: 9688 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 9689 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] 9690 ; GENERIC-NEXT: retq # sched: [1:1.00] 9691 ; 9692 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: 9693 ; SKX: # %bb.0: 9694 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 9695 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] 9696 ; SKX-NEXT: retq # sched: [7:1.00] 9697 %vec2 = load <4 x double>, <4 x double>* %vec2p 9698 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9699 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9700 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9701 ret <4 x double> %res 9702 } 9703 9704 define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 9705 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1: 9706 ; GENERIC: # %bb.0: 9707 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9708 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] 9709 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 9710 ; GENERIC-NEXT: retq # sched: [1:1.00] 9711 ; 9712 ; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask1: 9713 ; SKX: # %bb.0: 9714 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9715 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] 9716 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 9717 ; SKX-NEXT: retq # sched: [7:1.00] 9718 %vec2 = load <4 x double>, <4 x double>* %vec2p 9719 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9720 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9721 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9722 ret <4 x double> %res 9723 } 9724 9725 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 9726 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: 9727 ; GENERIC: # %bb.0: 9728 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 9729 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] 9730 ; GENERIC-NEXT: retq # sched: [1:1.00] 9731 ; 9732 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: 9733 ; SKX: # %bb.0: 9734 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 9735 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] 9736 ; SKX-NEXT: retq # sched: [7:1.00] 9737 %vec2 = load <4 x double>, <4 x double>* %vec2p 9738 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9739 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9740 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9741 ret <4 x double> %res 9742 } 9743 9744 define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 9745 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2: 9746 ; GENERIC: # %bb.0: 9747 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9748 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] 9749 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 9750 ; GENERIC-NEXT: retq # sched: [1:1.00] 9751 ; 9752 ; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask2: 9753 ; SKX: # %bb.0: 9754 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9755 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] 9756 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 9757 ; SKX-NEXT: retq # sched: [7:1.00] 9758 %vec2 = load <4 x double>, <4 x double>* %vec2p 9759 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9760 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9761 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9762 ret <4 x double> %res 9763 } 9764 9765 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 9766 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: 9767 ; GENERIC: # %bb.0: 9768 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 9769 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] 9770 ; GENERIC-NEXT: retq # sched: [1:1.00] 9771 ; 9772 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: 9773 ; SKX: # %bb.0: 9774 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 9775 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] 9776 ; SKX-NEXT: retq # sched: [7:1.00] 9777 %vec2 = load <4 x double>, <4 x double>* %vec2p 9778 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 9779 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9780 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9781 ret <4 x double> %res 9782 } 9783 9784 define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { 9785 ; GENERIC-LABEL: test_4xdouble_shuff_mem_mask3: 9786 ; GENERIC: # %bb.0: 9787 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 9788 ; GENERIC-NEXT: retq # sched: [1:1.00] 9789 ; 9790 ; SKX-LABEL: test_4xdouble_shuff_mem_mask3: 9791 ; SKX: # %bb.0: 9792 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 9793 ; SKX-NEXT: retq # sched: [7:1.00] 9794 %vec2 = load <4 x double>, <4 x double>* %vec2p 9795 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9796 ret <4 x double> %res 9797 } 9798 define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 9799 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3: 9800 ; GENERIC: # %bb.0: 9801 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 9802 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] 9803 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 9804 ; GENERIC-NEXT: retq # sched: [1:1.00] 9805 ; 9806 ; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask3: 9807 ; SKX: # %bb.0: 9808 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 9809 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] 9810 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 9811 ; SKX-NEXT: retq # sched: [7:1.00] 9812 %vec2 = load <4 x double>, <4 x double>* %vec2p 9813 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9814 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9815 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 9816 ret <4 x double> %res 9817 } 9818 9819 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 9820 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: 9821 ; GENERIC: # %bb.0: 9822 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 9823 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] 9824 ; GENERIC-NEXT: retq # sched: [1:1.00] 9825 ; 9826 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: 9827 ; SKX: # %bb.0: 9828 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 9829 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] 9830 ; SKX-NEXT: retq # sched: [7:1.00] 9831 %vec2 = load <4 x double>, <4 x double>* %vec2p 9832 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 9833 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 9834 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 9835 ret <4 x double> %res 9836 } 9837 9838 define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) { 9839 ; GENERIC-LABEL: test_8xdouble_shuff_mask0: 9840 ; GENERIC: # %bb.0: 9841 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] 9842 ; GENERIC-NEXT: retq # sched: [1:1.00] 9843 ; 9844 ; SKX-LABEL: test_8xdouble_shuff_mask0: 9845 ; SKX: # %bb.0: 9846 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] 9847 ; SKX-NEXT: retq # sched: [7:1.00] 9848 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9> 9849 ret <8 x double> %res 9850 } 9851 define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 9852 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask0: 9853 ; GENERIC: # %bb.0: 9854 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 9855 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] 9856 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 9857 ; GENERIC-NEXT: retq # sched: [1:1.00] 9858 ; 9859 ; SKX-LABEL: test_8xdouble_masked_shuff_mask0: 9860 ; SKX: # %bb.0: 9861 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 9862 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] 9863 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 9864 ; SKX-NEXT: retq # sched: [7:1.00] 9865 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9> 9866 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9867 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 9868 ret <8 x double> %res 9869 } 9870 9871 define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 9872 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask0: 9873 ; GENERIC: # %bb.0: 9874 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 9875 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] 9876 ; GENERIC-NEXT: retq # sched: [1:1.00] 9877 ; 9878 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask0: 9879 ; SKX: # %bb.0: 9880 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 9881 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] 9882 ; SKX-NEXT: retq # sched: [7:1.00] 9883 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9> 9884 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9885 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 9886 ret <8 x double> %res 9887 } 9888 define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 9889 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask1: 9890 ; GENERIC: # %bb.0: 9891 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 9892 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] 9893 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 9894 ; GENERIC-NEXT: retq # sched: [1:1.00] 9895 ; 9896 ; SKX-LABEL: test_8xdouble_masked_shuff_mask1: 9897 ; SKX: # %bb.0: 9898 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 9899 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [3:1.00] 9900 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 9901 ; SKX-NEXT: retq # sched: [7:1.00] 9902 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13> 9903 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9904 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 9905 ret <8 x double> %res 9906 } 9907 9908 define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 9909 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask1: 9910 ; GENERIC: # %bb.0: 9911 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 9912 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] 9913 ; GENERIC-NEXT: retq # sched: [1:1.00] 9914 ; 9915 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask1: 9916 ; SKX: # %bb.0: 9917 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 9918 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [3:1.00] 9919 ; SKX-NEXT: retq # sched: [7:1.00] 9920 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13> 9921 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9922 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 9923 ret <8 x double> %res 9924 } 9925 define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 9926 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask2: 9927 ; GENERIC: # %bb.0: 9928 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 9929 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] 9930 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 9931 ; GENERIC-NEXT: retq # sched: [1:1.00] 9932 ; 9933 ; SKX-LABEL: test_8xdouble_masked_shuff_mask2: 9934 ; SKX: # %bb.0: 9935 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 9936 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [3:1.00] 9937 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 9938 ; SKX-NEXT: retq # sched: [7:1.00] 9939 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9> 9940 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9941 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 9942 ret <8 x double> %res 9943 } 9944 9945 define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 9946 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask2: 9947 ; GENERIC: # %bb.0: 9948 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 9949 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] 9950 ; GENERIC-NEXT: retq # sched: [1:1.00] 9951 ; 9952 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask2: 9953 ; SKX: # %bb.0: 9954 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 9955 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [3:1.00] 9956 ; SKX-NEXT: retq # sched: [7:1.00] 9957 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9> 9958 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9959 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 9960 ret <8 x double> %res 9961 } 9962 define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) { 9963 ; GENERIC-LABEL: test_8xdouble_shuff_mask3: 9964 ; GENERIC: # %bb.0: 9965 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] 9966 ; GENERIC-NEXT: retq # sched: [1:1.00] 9967 ; 9968 ; SKX-LABEL: test_8xdouble_shuff_mask3: 9969 ; SKX: # %bb.0: 9970 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] 9971 ; SKX-NEXT: retq # sched: [7:1.00] 9972 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11> 9973 ret <8 x double> %res 9974 } 9975 define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 9976 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask3: 9977 ; GENERIC: # %bb.0: 9978 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 9979 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] 9980 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 9981 ; GENERIC-NEXT: retq # sched: [1:1.00] 9982 ; 9983 ; SKX-LABEL: test_8xdouble_masked_shuff_mask3: 9984 ; SKX: # %bb.0: 9985 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 9986 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] 9987 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 9988 ; SKX-NEXT: retq # sched: [7:1.00] 9989 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11> 9990 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 9991 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 9992 ret <8 x double> %res 9993 } 9994 9995 define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 9996 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask3: 9997 ; GENERIC: # %bb.0: 9998 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 9999 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] 10000 ; GENERIC-NEXT: retq # sched: [1:1.00] 10001 ; 10002 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask3: 10003 ; SKX: # %bb.0: 10004 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 10005 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] 10006 ; SKX-NEXT: retq # sched: [7:1.00] 10007 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11> 10008 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10009 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 10010 ret <8 x double> %res 10011 } 10012 define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { 10013 ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask0: 10014 ; GENERIC: # %bb.0: 10015 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] 10016 ; GENERIC-NEXT: retq # sched: [1:1.00] 10017 ; 10018 ; SKX-LABEL: test_8xdouble_shuff_mem_mask0: 10019 ; SKX: # %bb.0: 10020 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] 10021 ; SKX-NEXT: retq # sched: [7:1.00] 10022 %vec2 = load <8 x double>, <8 x double>* %vec2p 10023 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 10024 ret <8 x double> %res 10025 } 10026 define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 10027 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0: 10028 ; GENERIC: # %bb.0: 10029 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 10030 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] 10031 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 10032 ; GENERIC-NEXT: retq # sched: [1:1.00] 10033 ; 10034 ; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask0: 10035 ; SKX: # %bb.0: 10036 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 10037 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] 10038 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 10039 ; SKX-NEXT: retq # sched: [7:1.00] 10040 %vec2 = load <8 x double>, <8 x double>* %vec2p 10041 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 10042 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10043 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 10044 ret <8 x double> %res 10045 } 10046 10047 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 10048 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: 10049 ; GENERIC: # %bb.0: 10050 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 10051 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] 10052 ; GENERIC-NEXT: retq # sched: [1:1.00] 10053 ; 10054 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: 10055 ; SKX: # %bb.0: 10056 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 10057 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] 10058 ; SKX-NEXT: retq # sched: [7:1.00] 10059 %vec2 = load <8 x double>, <8 x double>* %vec2p 10060 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 10061 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10062 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 10063 ret <8 x double> %res 10064 } 10065 10066 define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 10067 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1: 10068 ; GENERIC: # %bb.0: 10069 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 10070 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00] 10071 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 10072 ; GENERIC-NEXT: retq # sched: [1:1.00] 10073 ; 10074 ; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask1: 10075 ; SKX: # %bb.0: 10076 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 10077 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [10:1.00] 10078 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 10079 ; SKX-NEXT: retq # sched: [7:1.00] 10080 %vec2 = load <8 x double>, <8 x double>* %vec2p 10081 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10082 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10083 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 10084 ret <8 x double> %res 10085 } 10086 10087 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 10088 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: 10089 ; GENERIC: # %bb.0: 10090 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 10091 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00] 10092 ; GENERIC-NEXT: retq # sched: [1:1.00] 10093 ; 10094 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: 10095 ; SKX: # %bb.0: 10096 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 10097 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [10:1.00] 10098 ; SKX-NEXT: retq # sched: [7:1.00] 10099 %vec2 = load <8 x double>, <8 x double>* %vec2p 10100 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10101 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10102 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 10103 ret <8 x double> %res 10104 } 10105 10106 define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 10107 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2: 10108 ; GENERIC: # %bb.0: 10109 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 10110 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00] 10111 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 10112 ; GENERIC-NEXT: retq # sched: [1:1.00] 10113 ; 10114 ; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask2: 10115 ; SKX: # %bb.0: 10116 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 10117 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [10:1.00] 10118 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 10119 ; SKX-NEXT: retq # sched: [7:1.00] 10120 %vec2 = load <8 x double>, <8 x double>* %vec2p 10121 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 12, i32 13> 10122 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10123 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 10124 ret <8 x double> %res 10125 } 10126 10127 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 10128 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: 10129 ; GENERIC: # %bb.0: 10130 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 10131 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00] 10132 ; GENERIC-NEXT: retq # sched: [1:1.00] 10133 ; 10134 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: 10135 ; SKX: # %bb.0: 10136 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 10137 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [10:1.00] 10138 ; SKX-NEXT: retq # sched: [7:1.00] 10139 %vec2 = load <8 x double>, <8 x double>* %vec2p 10140 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 12, i32 13> 10141 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10142 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 10143 ret <8 x double> %res 10144 } 10145 10146 define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { 10147 ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask3: 10148 ; GENERIC: # %bb.0: 10149 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] 10150 ; GENERIC-NEXT: retq # sched: [1:1.00] 10151 ; 10152 ; SKX-LABEL: test_8xdouble_shuff_mem_mask3: 10153 ; SKX: # %bb.0: 10154 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] 10155 ; SKX-NEXT: retq # sched: [7:1.00] 10156 %vec2 = load <8 x double>, <8 x double>* %vec2p 10157 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9> 10158 ret <8 x double> %res 10159 } 10160 define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 10161 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3: 10162 ; GENERIC: # %bb.0: 10163 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 10164 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] 10165 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 10166 ; GENERIC-NEXT: retq # sched: [1:1.00] 10167 ; 10168 ; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask3: 10169 ; SKX: # %bb.0: 10170 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 10171 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] 10172 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 10173 ; SKX-NEXT: retq # sched: [7:1.00] 10174 %vec2 = load <8 x double>, <8 x double>* %vec2p 10175 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9> 10176 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10177 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 10178 ret <8 x double> %res 10179 } 10180 10181 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 10182 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: 10183 ; GENERIC: # %bb.0: 10184 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 10185 ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] 10186 ; GENERIC-NEXT: retq # sched: [1:1.00] 10187 ; 10188 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: 10189 ; SKX: # %bb.0: 10190 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 10191 ; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] 10192 ; SKX-NEXT: retq # sched: [7:1.00] 10193 %vec2 = load <8 x double>, <8 x double>* %vec2p 10194 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9> 10195 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 10196 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 10197 ret <8 x double> %res 10198 } 10199 10200 define <8 x i32> @test_8xi32_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2) { 10201 ; GENERIC-LABEL: test_8xi32_shuff_mask0: 10202 ; GENERIC: # %bb.0: 10203 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 10204 ; GENERIC-NEXT: retq # sched: [1:1.00] 10205 ; 10206 ; SKX-LABEL: test_8xi32_shuff_mask0: 10207 ; SKX: # %bb.0: 10208 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 10209 ; SKX-NEXT: retq # sched: [7:1.00] 10210 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10211 ret <8 x i32> %res 10212 } 10213 define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 10214 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask0: 10215 ; GENERIC: # %bb.0: 10216 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 10217 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 10218 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10219 ; GENERIC-NEXT: retq # sched: [1:1.00] 10220 ; 10221 ; SKX-LABEL: test_8xi32_masked_shuff_mask0: 10222 ; SKX: # %bb.0: 10223 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 10224 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 10225 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10226 ; SKX-NEXT: retq # sched: [7:1.00] 10227 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10228 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10229 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10230 ret <8 x i32> %res 10231 } 10232 10233 define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 10234 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask0: 10235 ; GENERIC: # %bb.0: 10236 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10237 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 10238 ; GENERIC-NEXT: retq # sched: [1:1.00] 10239 ; 10240 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask0: 10241 ; SKX: # %bb.0: 10242 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10243 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 10244 ; SKX-NEXT: retq # sched: [7:1.00] 10245 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10246 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10247 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10248 ret <8 x i32> %res 10249 } 10250 define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 10251 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask1: 10252 ; GENERIC: # %bb.0: 10253 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 10254 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 10255 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10256 ; GENERIC-NEXT: retq # sched: [1:1.00] 10257 ; 10258 ; SKX-LABEL: test_8xi32_masked_shuff_mask1: 10259 ; SKX: # %bb.0: 10260 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 10261 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 10262 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10263 ; SKX-NEXT: retq # sched: [7:1.00] 10264 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10265 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10266 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10267 ret <8 x i32> %res 10268 } 10269 10270 define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 10271 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask1: 10272 ; GENERIC: # %bb.0: 10273 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10274 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 10275 ; GENERIC-NEXT: retq # sched: [1:1.00] 10276 ; 10277 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask1: 10278 ; SKX: # %bb.0: 10279 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10280 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 10281 ; SKX-NEXT: retq # sched: [7:1.00] 10282 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10283 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10284 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10285 ret <8 x i32> %res 10286 } 10287 define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 10288 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask2: 10289 ; GENERIC: # %bb.0: 10290 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 10291 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 10292 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10293 ; GENERIC-NEXT: retq # sched: [1:1.00] 10294 ; 10295 ; SKX-LABEL: test_8xi32_masked_shuff_mask2: 10296 ; SKX: # %bb.0: 10297 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 10298 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 10299 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10300 ; SKX-NEXT: retq # sched: [7:1.00] 10301 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10302 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10303 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10304 ret <8 x i32> %res 10305 } 10306 10307 define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 10308 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask2: 10309 ; GENERIC: # %bb.0: 10310 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10311 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] 10312 ; GENERIC-NEXT: retq # sched: [1:1.00] 10313 ; 10314 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask2: 10315 ; SKX: # %bb.0: 10316 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10317 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] 10318 ; SKX-NEXT: retq # sched: [7:1.00] 10319 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10320 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10321 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10322 ret <8 x i32> %res 10323 } 10324 define <8 x i32> @test_8xi32_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2) { 10325 ; GENERIC-LABEL: test_8xi32_shuff_mask3: 10326 ; GENERIC: # %bb.0: 10327 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 10328 ; GENERIC-NEXT: retq # sched: [1:1.00] 10329 ; 10330 ; SKX-LABEL: test_8xi32_shuff_mask3: 10331 ; SKX: # %bb.0: 10332 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 10333 ; SKX-NEXT: retq # sched: [7:1.00] 10334 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10335 ret <8 x i32> %res 10336 } 10337 define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { 10338 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask3: 10339 ; GENERIC: # %bb.0: 10340 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 10341 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 10342 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10343 ; GENERIC-NEXT: retq # sched: [1:1.00] 10344 ; 10345 ; SKX-LABEL: test_8xi32_masked_shuff_mask3: 10346 ; SKX: # %bb.0: 10347 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 10348 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 10349 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10350 ; SKX-NEXT: retq # sched: [7:1.00] 10351 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10352 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10353 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10354 ret <8 x i32> %res 10355 } 10356 10357 define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { 10358 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask3: 10359 ; GENERIC: # %bb.0: 10360 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10361 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] 10362 ; GENERIC-NEXT: retq # sched: [1:1.00] 10363 ; 10364 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask3: 10365 ; SKX: # %bb.0: 10366 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10367 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] 10368 ; SKX-NEXT: retq # sched: [7:1.00] 10369 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10370 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10371 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10372 ret <8 x i32> %res 10373 } 10374 define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) { 10375 ; GENERIC-LABEL: test_8xi32_shuff_mem_mask0: 10376 ; GENERIC: # %bb.0: 10377 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 10378 ; GENERIC-NEXT: retq # sched: [1:1.00] 10379 ; 10380 ; SKX-LABEL: test_8xi32_shuff_mem_mask0: 10381 ; SKX: # %bb.0: 10382 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 10383 ; SKX-NEXT: retq # sched: [7:1.00] 10384 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10385 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10386 ret <8 x i32> %res 10387 } 10388 define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 10389 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0: 10390 ; GENERIC: # %bb.0: 10391 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10392 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 10393 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 10394 ; GENERIC-NEXT: retq # sched: [1:1.00] 10395 ; 10396 ; SKX-LABEL: test_8xi32_masked_shuff_mem_mask0: 10397 ; SKX: # %bb.0: 10398 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10399 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 10400 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 10401 ; SKX-NEXT: retq # sched: [7:1.00] 10402 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10403 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10404 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10405 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10406 ret <8 x i32> %res 10407 } 10408 10409 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 10410 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: 10411 ; GENERIC: # %bb.0: 10412 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 10413 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] 10414 ; GENERIC-NEXT: retq # sched: [1:1.00] 10415 ; 10416 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: 10417 ; SKX: # %bb.0: 10418 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 10419 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] 10420 ; SKX-NEXT: retq # sched: [7:1.00] 10421 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10422 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 10423 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10424 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10425 ret <8 x i32> %res 10426 } 10427 10428 define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 10429 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1: 10430 ; GENERIC: # %bb.0: 10431 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10432 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10433 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 10434 ; GENERIC-NEXT: retq # sched: [1:1.00] 10435 ; 10436 ; SKX-LABEL: test_8xi32_masked_shuff_mem_mask1: 10437 ; SKX: # %bb.0: 10438 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10439 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10440 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 10441 ; SKX-NEXT: retq # sched: [7:1.00] 10442 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10443 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10444 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10445 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10446 ret <8 x i32> %res 10447 } 10448 10449 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 10450 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: 10451 ; GENERIC: # %bb.0: 10452 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 10453 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10454 ; GENERIC-NEXT: retq # sched: [1:1.00] 10455 ; 10456 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: 10457 ; SKX: # %bb.0: 10458 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 10459 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10460 ; SKX-NEXT: retq # sched: [7:1.00] 10461 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10462 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10463 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10464 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10465 ret <8 x i32> %res 10466 } 10467 10468 define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 10469 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2: 10470 ; GENERIC: # %bb.0: 10471 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10472 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10473 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 10474 ; GENERIC-NEXT: retq # sched: [1:1.00] 10475 ; 10476 ; SKX-LABEL: test_8xi32_masked_shuff_mem_mask2: 10477 ; SKX: # %bb.0: 10478 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10479 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10480 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 10481 ; SKX-NEXT: retq # sched: [7:1.00] 10482 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10483 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10484 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10485 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10486 ret <8 x i32> %res 10487 } 10488 10489 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 10490 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: 10491 ; GENERIC: # %bb.0: 10492 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 10493 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10494 ; GENERIC-NEXT: retq # sched: [1:1.00] 10495 ; 10496 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: 10497 ; SKX: # %bb.0: 10498 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 10499 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10500 ; SKX-NEXT: retq # sched: [7:1.00] 10501 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10502 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10503 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10504 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10505 ret <8 x i32> %res 10506 } 10507 10508 define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) { 10509 ; GENERIC-LABEL: test_8xi32_shuff_mem_mask3: 10510 ; GENERIC: # %bb.0: 10511 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] 10512 ; GENERIC-NEXT: retq # sched: [1:1.00] 10513 ; 10514 ; SKX-LABEL: test_8xi32_shuff_mem_mask3: 10515 ; SKX: # %bb.0: 10516 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 10517 ; SKX-NEXT: retq # sched: [7:1.00] 10518 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10519 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10520 ret <8 x i32> %res 10521 } 10522 define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { 10523 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3: 10524 ; GENERIC: # %bb.0: 10525 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 10526 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10527 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 10528 ; GENERIC-NEXT: retq # sched: [1:1.00] 10529 ; 10530 ; SKX-LABEL: test_8xi32_masked_shuff_mem_mask3: 10531 ; SKX: # %bb.0: 10532 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 10533 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10534 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 10535 ; SKX-NEXT: retq # sched: [7:1.00] 10536 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10537 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10538 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10539 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 10540 ret <8 x i32> %res 10541 } 10542 10543 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { 10544 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: 10545 ; GENERIC: # %bb.0: 10546 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 10547 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] 10548 ; GENERIC-NEXT: retq # sched: [1:1.00] 10549 ; 10550 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: 10551 ; SKX: # %bb.0: 10552 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 10553 ; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] 10554 ; SKX-NEXT: retq # sched: [7:1.00] 10555 %vec2 = load <8 x i32>, <8 x i32>* %vec2p 10556 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 10557 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 10558 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 10559 ret <8 x i32> %res 10560 } 10561 10562 define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) { 10563 ; GENERIC-LABEL: test_16xi32_shuff_mask0: 10564 ; GENERIC: # %bb.0: 10565 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 10566 ; GENERIC-NEXT: retq # sched: [1:1.00] 10567 ; 10568 ; SKX-LABEL: test_16xi32_shuff_mask0: 10569 ; SKX: # %bb.0: 10570 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 10571 ; SKX-NEXT: retq # sched: [7:1.00] 10572 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10573 ret <16 x i32> %res 10574 } 10575 define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 10576 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask0: 10577 ; GENERIC: # %bb.0: 10578 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 10579 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 10580 ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 10581 ; GENERIC-NEXT: retq # sched: [1:1.00] 10582 ; 10583 ; SKX-LABEL: test_16xi32_masked_shuff_mask0: 10584 ; SKX: # %bb.0: 10585 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 10586 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 10587 ; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 10588 ; SKX-NEXT: retq # sched: [7:1.00] 10589 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10590 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10591 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10592 ret <16 x i32> %res 10593 } 10594 10595 define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 10596 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask0: 10597 ; GENERIC: # %bb.0: 10598 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10599 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] 10600 ; GENERIC-NEXT: retq # sched: [1:1.00] 10601 ; 10602 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask0: 10603 ; SKX: # %bb.0: 10604 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10605 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] 10606 ; SKX-NEXT: retq # sched: [7:1.00] 10607 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10608 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10609 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10610 ret <16 x i32> %res 10611 } 10612 define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 10613 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask1: 10614 ; GENERIC: # %bb.0: 10615 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 10616 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] 10617 ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 10618 ; GENERIC-NEXT: retq # sched: [1:1.00] 10619 ; 10620 ; SKX-LABEL: test_16xi32_masked_shuff_mask1: 10621 ; SKX: # %bb.0: 10622 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 10623 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] 10624 ; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 10625 ; SKX-NEXT: retq # sched: [7:1.00] 10626 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 10627 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10628 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10629 ret <16 x i32> %res 10630 } 10631 10632 define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 10633 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask1: 10634 ; GENERIC: # %bb.0: 10635 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10636 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] 10637 ; GENERIC-NEXT: retq # sched: [1:1.00] 10638 ; 10639 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask1: 10640 ; SKX: # %bb.0: 10641 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10642 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] 10643 ; SKX-NEXT: retq # sched: [7:1.00] 10644 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 10645 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10646 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10647 ret <16 x i32> %res 10648 } 10649 define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 10650 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask2: 10651 ; GENERIC: # %bb.0: 10652 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 10653 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] 10654 ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 10655 ; GENERIC-NEXT: retq # sched: [1:1.00] 10656 ; 10657 ; SKX-LABEL: test_16xi32_masked_shuff_mask2: 10658 ; SKX: # %bb.0: 10659 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 10660 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [3:1.00] 10661 ; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 10662 ; SKX-NEXT: retq # sched: [7:1.00] 10663 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19> 10664 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10665 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10666 ret <16 x i32> %res 10667 } 10668 10669 define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 10670 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask2: 10671 ; GENERIC: # %bb.0: 10672 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10673 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] 10674 ; GENERIC-NEXT: retq # sched: [1:1.00] 10675 ; 10676 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask2: 10677 ; SKX: # %bb.0: 10678 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10679 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [3:1.00] 10680 ; SKX-NEXT: retq # sched: [7:1.00] 10681 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19> 10682 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10683 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10684 ret <16 x i32> %res 10685 } 10686 define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) { 10687 ; GENERIC-LABEL: test_16xi32_shuff_mask3: 10688 ; GENERIC: # %bb.0: 10689 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] 10690 ; GENERIC-NEXT: retq # sched: [1:1.00] 10691 ; 10692 ; SKX-LABEL: test_16xi32_shuff_mask3: 10693 ; SKX: # %bb.0: 10694 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] 10695 ; SKX-NEXT: retq # sched: [7:1.00] 10696 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 10697 ret <16 x i32> %res 10698 } 10699 define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { 10700 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask3: 10701 ; GENERIC: # %bb.0: 10702 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 10703 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] 10704 ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 10705 ; GENERIC-NEXT: retq # sched: [1:1.00] 10706 ; 10707 ; SKX-LABEL: test_16xi32_masked_shuff_mask3: 10708 ; SKX: # %bb.0: 10709 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 10710 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] 10711 ; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 10712 ; SKX-NEXT: retq # sched: [7:1.00] 10713 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 10714 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10715 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10716 ret <16 x i32> %res 10717 } 10718 10719 define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { 10720 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask3: 10721 ; GENERIC: # %bb.0: 10722 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10723 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] 10724 ; GENERIC-NEXT: retq # sched: [1:1.00] 10725 ; 10726 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask3: 10727 ; SKX: # %bb.0: 10728 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10729 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] 10730 ; SKX-NEXT: retq # sched: [7:1.00] 10731 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> 10732 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10733 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10734 ret <16 x i32> %res 10735 } 10736 define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) { 10737 ; GENERIC-LABEL: test_16xi32_shuff_mem_mask0: 10738 ; GENERIC: # %bb.0: 10739 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] 10740 ; GENERIC-NEXT: retq # sched: [1:1.00] 10741 ; 10742 ; SKX-LABEL: test_16xi32_shuff_mem_mask0: 10743 ; SKX: # %bb.0: 10744 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00] 10745 ; SKX-NEXT: retq # sched: [7:1.00] 10746 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10747 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19> 10748 ret <16 x i32> %res 10749 } 10750 define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 10751 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0: 10752 ; GENERIC: # %bb.0: 10753 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10754 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] 10755 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 10756 ; GENERIC-NEXT: retq # sched: [1:1.00] 10757 ; 10758 ; SKX-LABEL: test_16xi32_masked_shuff_mem_mask0: 10759 ; SKX: # %bb.0: 10760 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10761 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00] 10762 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 10763 ; SKX-NEXT: retq # sched: [7:1.00] 10764 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10765 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19> 10766 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10767 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10768 ret <16 x i32> %res 10769 } 10770 10771 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 10772 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: 10773 ; GENERIC: # %bb.0: 10774 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 10775 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] 10776 ; GENERIC-NEXT: retq # sched: [1:1.00] 10777 ; 10778 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: 10779 ; SKX: # %bb.0: 10780 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 10781 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00] 10782 ; SKX-NEXT: retq # sched: [7:1.00] 10783 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10784 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19> 10785 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10786 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10787 ret <16 x i32> %res 10788 } 10789 10790 define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 10791 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1: 10792 ; GENERIC: # %bb.0: 10793 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10794 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00] 10795 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 10796 ; GENERIC-NEXT: retq # sched: [1:1.00] 10797 ; 10798 ; SKX-LABEL: test_16xi32_masked_shuff_mem_mask1: 10799 ; SKX: # %bb.0: 10800 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10801 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [10:1.00] 10802 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 10803 ; SKX-NEXT: retq # sched: [7:1.00] 10804 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10805 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 10806 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10807 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10808 ret <16 x i32> %res 10809 } 10810 10811 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 10812 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: 10813 ; GENERIC: # %bb.0: 10814 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 10815 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00] 10816 ; GENERIC-NEXT: retq # sched: [1:1.00] 10817 ; 10818 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: 10819 ; SKX: # %bb.0: 10820 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 10821 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [10:1.00] 10822 ; SKX-NEXT: retq # sched: [7:1.00] 10823 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10824 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> 10825 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10826 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10827 ret <16 x i32> %res 10828 } 10829 10830 define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 10831 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2: 10832 ; GENERIC: # %bb.0: 10833 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10834 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] 10835 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 10836 ; GENERIC-NEXT: retq # sched: [1:1.00] 10837 ; 10838 ; SKX-LABEL: test_16xi32_masked_shuff_mem_mask2: 10839 ; SKX: # %bb.0: 10840 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10841 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] 10842 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 10843 ; SKX-NEXT: retq # sched: [7:1.00] 10844 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10845 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 10846 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10847 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10848 ret <16 x i32> %res 10849 } 10850 10851 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 10852 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: 10853 ; GENERIC: # %bb.0: 10854 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 10855 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] 10856 ; GENERIC-NEXT: retq # sched: [1:1.00] 10857 ; 10858 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: 10859 ; SKX: # %bb.0: 10860 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 10861 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] 10862 ; SKX-NEXT: retq # sched: [7:1.00] 10863 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10864 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31> 10865 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10866 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10867 ret <16 x i32> %res 10868 } 10869 10870 define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) { 10871 ; GENERIC-LABEL: test_16xi32_shuff_mem_mask3: 10872 ; GENERIC: # %bb.0: 10873 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] 10874 ; GENERIC-NEXT: retq # sched: [1:1.00] 10875 ; 10876 ; SKX-LABEL: test_16xi32_shuff_mem_mask3: 10877 ; SKX: # %bb.0: 10878 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00] 10879 ; SKX-NEXT: retq # sched: [7:1.00] 10880 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10881 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10882 ret <16 x i32> %res 10883 } 10884 define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { 10885 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3: 10886 ; GENERIC: # %bb.0: 10887 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 10888 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] 10889 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 10890 ; GENERIC-NEXT: retq # sched: [1:1.00] 10891 ; 10892 ; SKX-LABEL: test_16xi32_masked_shuff_mem_mask3: 10893 ; SKX: # %bb.0: 10894 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 10895 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00] 10896 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 10897 ; SKX-NEXT: retq # sched: [7:1.00] 10898 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10899 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10900 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10901 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 10902 ret <16 x i32> %res 10903 } 10904 10905 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { 10906 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: 10907 ; GENERIC: # %bb.0: 10908 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 10909 ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] 10910 ; GENERIC-NEXT: retq # sched: [1:1.00] 10911 ; 10912 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: 10913 ; SKX: # %bb.0: 10914 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 10915 ; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00] 10916 ; SKX-NEXT: retq # sched: [7:1.00] 10917 %vec2 = load <16 x i32>, <16 x i32>* %vec2p 10918 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> 10919 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 10920 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 10921 ret <16 x i32> %res 10922 } 10923 10924 define <4 x i64> @test_4xi64_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2) { 10925 ; GENERIC-LABEL: test_4xi64_shuff_mask0: 10926 ; GENERIC: # %bb.0: 10927 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 10928 ; GENERIC-NEXT: retq # sched: [1:1.00] 10929 ; 10930 ; SKX-LABEL: test_4xi64_shuff_mask0: 10931 ; SKX: # %bb.0: 10932 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 10933 ; SKX-NEXT: retq # sched: [7:1.00] 10934 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 10935 ret <4 x i64> %res 10936 } 10937 define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 10938 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask0: 10939 ; GENERIC: # %bb.0: 10940 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 10941 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 10942 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10943 ; GENERIC-NEXT: retq # sched: [1:1.00] 10944 ; 10945 ; SKX-LABEL: test_4xi64_masked_shuff_mask0: 10946 ; SKX: # %bb.0: 10947 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 10948 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 10949 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10950 ; SKX-NEXT: retq # sched: [7:1.00] 10951 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 10952 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 10953 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 10954 ret <4 x i64> %res 10955 } 10956 10957 define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 10958 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask0: 10959 ; GENERIC: # %bb.0: 10960 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 10961 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 10962 ; GENERIC-NEXT: retq # sched: [1:1.00] 10963 ; 10964 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask0: 10965 ; SKX: # %bb.0: 10966 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 10967 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 10968 ; SKX-NEXT: retq # sched: [7:1.00] 10969 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 10970 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 10971 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 10972 ret <4 x i64> %res 10973 } 10974 define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 10975 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask1: 10976 ; GENERIC: # %bb.0: 10977 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 10978 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 10979 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 10980 ; GENERIC-NEXT: retq # sched: [1:1.00] 10981 ; 10982 ; SKX-LABEL: test_4xi64_masked_shuff_mask1: 10983 ; SKX: # %bb.0: 10984 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 10985 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 10986 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 10987 ; SKX-NEXT: retq # sched: [7:1.00] 10988 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 10989 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 10990 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 10991 ret <4 x i64> %res 10992 } 10993 10994 define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 10995 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask1: 10996 ; GENERIC: # %bb.0: 10997 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 10998 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 10999 ; GENERIC-NEXT: retq # sched: [1:1.00] 11000 ; 11001 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask1: 11002 ; SKX: # %bb.0: 11003 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11004 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 11005 ; SKX-NEXT: retq # sched: [7:1.00] 11006 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11007 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11008 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11009 ret <4 x i64> %res 11010 } 11011 define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 11012 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask2: 11013 ; GENERIC: # %bb.0: 11014 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 11015 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 11016 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 11017 ; GENERIC-NEXT: retq # sched: [1:1.00] 11018 ; 11019 ; SKX-LABEL: test_4xi64_masked_shuff_mask2: 11020 ; SKX: # %bb.0: 11021 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 11022 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 11023 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 11024 ; SKX-NEXT: retq # sched: [7:1.00] 11025 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11026 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11027 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11028 ret <4 x i64> %res 11029 } 11030 11031 define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 11032 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask2: 11033 ; GENERIC: # %bb.0: 11034 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11035 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 11036 ; GENERIC-NEXT: retq # sched: [1:1.00] 11037 ; 11038 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask2: 11039 ; SKX: # %bb.0: 11040 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11041 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 11042 ; SKX-NEXT: retq # sched: [7:1.00] 11043 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11044 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11045 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11046 ret <4 x i64> %res 11047 } 11048 define <4 x i64> @test_4xi64_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2) { 11049 ; GENERIC-LABEL: test_4xi64_shuff_mask3: 11050 ; GENERIC: # %bb.0: 11051 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 11052 ; GENERIC-NEXT: retq # sched: [1:1.00] 11053 ; 11054 ; SKX-LABEL: test_4xi64_shuff_mask3: 11055 ; SKX: # %bb.0: 11056 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 11057 ; SKX-NEXT: retq # sched: [7:1.00] 11058 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11059 ret <4 x i64> %res 11060 } 11061 define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { 11062 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask3: 11063 ; GENERIC: # %bb.0: 11064 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 11065 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 11066 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 11067 ; GENERIC-NEXT: retq # sched: [1:1.00] 11068 ; 11069 ; SKX-LABEL: test_4xi64_masked_shuff_mask3: 11070 ; SKX: # %bb.0: 11071 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 11072 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 11073 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 11074 ; SKX-NEXT: retq # sched: [7:1.00] 11075 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11076 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11077 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11078 ret <4 x i64> %res 11079 } 11080 11081 define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { 11082 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask3: 11083 ; GENERIC: # %bb.0: 11084 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11085 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] 11086 ; GENERIC-NEXT: retq # sched: [1:1.00] 11087 ; 11088 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask3: 11089 ; SKX: # %bb.0: 11090 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11091 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] 11092 ; SKX-NEXT: retq # sched: [7:1.00] 11093 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11094 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11095 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11096 ret <4 x i64> %res 11097 } 11098 define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) { 11099 ; GENERIC-LABEL: test_4xi64_shuff_mem_mask0: 11100 ; GENERIC: # %bb.0: 11101 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 11102 ; GENERIC-NEXT: retq # sched: [1:1.00] 11103 ; 11104 ; SKX-LABEL: test_4xi64_shuff_mem_mask0: 11105 ; SKX: # %bb.0: 11106 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 11107 ; SKX-NEXT: retq # sched: [7:1.00] 11108 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11109 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11110 ret <4 x i64> %res 11111 } 11112 define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 11113 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0: 11114 ; GENERIC: # %bb.0: 11115 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11116 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] 11117 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 11118 ; GENERIC-NEXT: retq # sched: [1:1.00] 11119 ; 11120 ; SKX-LABEL: test_4xi64_masked_shuff_mem_mask0: 11121 ; SKX: # %bb.0: 11122 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11123 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] 11124 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 11125 ; SKX-NEXT: retq # sched: [7:1.00] 11126 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11127 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11128 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11129 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11130 ret <4 x i64> %res 11131 } 11132 11133 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 11134 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: 11135 ; GENERIC: # %bb.0: 11136 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 11137 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] 11138 ; GENERIC-NEXT: retq # sched: [1:1.00] 11139 ; 11140 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: 11141 ; SKX: # %bb.0: 11142 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 11143 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] 11144 ; SKX-NEXT: retq # sched: [7:1.00] 11145 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11146 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11147 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11148 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11149 ret <4 x i64> %res 11150 } 11151 11152 define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 11153 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1: 11154 ; GENERIC: # %bb.0: 11155 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11156 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] 11157 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 11158 ; GENERIC-NEXT: retq # sched: [1:1.00] 11159 ; 11160 ; SKX-LABEL: test_4xi64_masked_shuff_mem_mask1: 11161 ; SKX: # %bb.0: 11162 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11163 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] 11164 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 11165 ; SKX-NEXT: retq # sched: [7:1.00] 11166 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11167 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11168 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11169 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11170 ret <4 x i64> %res 11171 } 11172 11173 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 11174 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: 11175 ; GENERIC: # %bb.0: 11176 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 11177 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] 11178 ; GENERIC-NEXT: retq # sched: [1:1.00] 11179 ; 11180 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: 11181 ; SKX: # %bb.0: 11182 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 11183 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] 11184 ; SKX-NEXT: retq # sched: [7:1.00] 11185 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11186 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11187 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11188 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11189 ret <4 x i64> %res 11190 } 11191 11192 define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 11193 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2: 11194 ; GENERIC: # %bb.0: 11195 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11196 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] 11197 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 11198 ; GENERIC-NEXT: retq # sched: [1:1.00] 11199 ; 11200 ; SKX-LABEL: test_4xi64_masked_shuff_mem_mask2: 11201 ; SKX: # %bb.0: 11202 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11203 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] 11204 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 11205 ; SKX-NEXT: retq # sched: [7:1.00] 11206 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11207 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11208 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11209 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11210 ret <4 x i64> %res 11211 } 11212 11213 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 11214 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: 11215 ; GENERIC: # %bb.0: 11216 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 11217 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] 11218 ; GENERIC-NEXT: retq # sched: [1:1.00] 11219 ; 11220 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: 11221 ; SKX: # %bb.0: 11222 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 11223 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] 11224 ; SKX-NEXT: retq # sched: [7:1.00] 11225 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11226 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 11227 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11228 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11229 ret <4 x i64> %res 11230 } 11231 11232 define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) { 11233 ; GENERIC-LABEL: test_4xi64_shuff_mem_mask3: 11234 ; GENERIC: # %bb.0: 11235 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] 11236 ; GENERIC-NEXT: retq # sched: [1:1.00] 11237 ; 11238 ; SKX-LABEL: test_4xi64_shuff_mem_mask3: 11239 ; SKX: # %bb.0: 11240 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] 11241 ; SKX-NEXT: retq # sched: [7:1.00] 11242 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11243 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11244 ret <4 x i64> %res 11245 } 11246 define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { 11247 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3: 11248 ; GENERIC: # %bb.0: 11249 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 11250 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] 11251 ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] 11252 ; GENERIC-NEXT: retq # sched: [1:1.00] 11253 ; 11254 ; SKX-LABEL: test_4xi64_masked_shuff_mem_mask3: 11255 ; SKX: # %bb.0: 11256 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 11257 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] 11258 ; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] 11259 ; SKX-NEXT: retq # sched: [7:1.00] 11260 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11261 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11262 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11263 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 11264 ret <4 x i64> %res 11265 } 11266 11267 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { 11268 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: 11269 ; GENERIC: # %bb.0: 11270 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 11271 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] 11272 ; GENERIC-NEXT: retq # sched: [1:1.00] 11273 ; 11274 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: 11275 ; SKX: # %bb.0: 11276 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 11277 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] 11278 ; SKX-NEXT: retq # sched: [7:1.00] 11279 %vec2 = load <4 x i64>, <4 x i64>* %vec2p 11280 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 11281 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 11282 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 11283 ret <4 x i64> %res 11284 } 11285 11286 define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) { 11287 ; GENERIC-LABEL: test_8xi64_shuff_mask0: 11288 ; GENERIC: # %bb.0: 11289 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] 11290 ; GENERIC-NEXT: retq # sched: [1:1.00] 11291 ; 11292 ; SKX-LABEL: test_8xi64_shuff_mask0: 11293 ; SKX: # %bb.0: 11294 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] 11295 ; SKX-NEXT: retq # sched: [7:1.00] 11296 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13> 11297 ret <8 x i64> %res 11298 } 11299 define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 11300 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask0: 11301 ; GENERIC: # %bb.0: 11302 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 11303 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] 11304 ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 11305 ; GENERIC-NEXT: retq # sched: [1:1.00] 11306 ; 11307 ; SKX-LABEL: test_8xi64_masked_shuff_mask0: 11308 ; SKX: # %bb.0: 11309 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 11310 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] 11311 ; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 11312 ; SKX-NEXT: retq # sched: [7:1.00] 11313 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13> 11314 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11315 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11316 ret <8 x i64> %res 11317 } 11318 11319 define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 11320 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask0: 11321 ; GENERIC: # %bb.0: 11322 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11323 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] 11324 ; GENERIC-NEXT: retq # sched: [1:1.00] 11325 ; 11326 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask0: 11327 ; SKX: # %bb.0: 11328 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11329 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] 11330 ; SKX-NEXT: retq # sched: [7:1.00] 11331 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13> 11332 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11333 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11334 ret <8 x i64> %res 11335 } 11336 define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 11337 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask1: 11338 ; GENERIC: # %bb.0: 11339 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 11340 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] 11341 ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 11342 ; GENERIC-NEXT: retq # sched: [1:1.00] 11343 ; 11344 ; SKX-LABEL: test_8xi64_masked_shuff_mask1: 11345 ; SKX: # %bb.0: 11346 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 11347 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [3:1.00] 11348 ; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 11349 ; SKX-NEXT: retq # sched: [7:1.00] 11350 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13> 11351 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11352 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11353 ret <8 x i64> %res 11354 } 11355 11356 define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 11357 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask1: 11358 ; GENERIC: # %bb.0: 11359 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11360 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] 11361 ; GENERIC-NEXT: retq # sched: [1:1.00] 11362 ; 11363 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask1: 11364 ; SKX: # %bb.0: 11365 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11366 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [3:1.00] 11367 ; SKX-NEXT: retq # sched: [7:1.00] 11368 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13> 11369 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11370 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11371 ret <8 x i64> %res 11372 } 11373 define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 11374 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask2: 11375 ; GENERIC: # %bb.0: 11376 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 11377 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] 11378 ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 11379 ; GENERIC-NEXT: retq # sched: [1:1.00] 11380 ; 11381 ; SKX-LABEL: test_8xi64_masked_shuff_mask2: 11382 ; SKX: # %bb.0: 11383 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 11384 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [3:1.00] 11385 ; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 11386 ; SKX-NEXT: retq # sched: [7:1.00] 11387 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9> 11388 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11389 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11390 ret <8 x i64> %res 11391 } 11392 11393 define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 11394 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask2: 11395 ; GENERIC: # %bb.0: 11396 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11397 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] 11398 ; GENERIC-NEXT: retq # sched: [1:1.00] 11399 ; 11400 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask2: 11401 ; SKX: # %bb.0: 11402 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11403 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [3:1.00] 11404 ; SKX-NEXT: retq # sched: [7:1.00] 11405 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9> 11406 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11407 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11408 ret <8 x i64> %res 11409 } 11410 define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) { 11411 ; GENERIC-LABEL: test_8xi64_shuff_mask3: 11412 ; GENERIC: # %bb.0: 11413 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] 11414 ; GENERIC-NEXT: retq # sched: [1:1.00] 11415 ; 11416 ; SKX-LABEL: test_8xi64_shuff_mask3: 11417 ; SKX: # %bb.0: 11418 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] 11419 ; SKX-NEXT: retq # sched: [7:1.00] 11420 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11> 11421 ret <8 x i64> %res 11422 } 11423 define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { 11424 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask3: 11425 ; GENERIC: # %bb.0: 11426 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 11427 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] 11428 ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] 11429 ; GENERIC-NEXT: retq # sched: [1:1.00] 11430 ; 11431 ; SKX-LABEL: test_8xi64_masked_shuff_mask3: 11432 ; SKX: # %bb.0: 11433 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 11434 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] 11435 ; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] 11436 ; SKX-NEXT: retq # sched: [7:1.00] 11437 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11> 11438 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11439 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11440 ret <8 x i64> %res 11441 } 11442 11443 define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { 11444 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask3: 11445 ; GENERIC: # %bb.0: 11446 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11447 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] 11448 ; GENERIC-NEXT: retq # sched: [1:1.00] 11449 ; 11450 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask3: 11451 ; SKX: # %bb.0: 11452 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11453 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] 11454 ; SKX-NEXT: retq # sched: [7:1.00] 11455 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11> 11456 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11457 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11458 ret <8 x i64> %res 11459 } 11460 define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) { 11461 ; GENERIC-LABEL: test_8xi64_shuff_mem_mask0: 11462 ; GENERIC: # %bb.0: 11463 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] 11464 ; GENERIC-NEXT: retq # sched: [1:1.00] 11465 ; 11466 ; SKX-LABEL: test_8xi64_shuff_mem_mask0: 11467 ; SKX: # %bb.0: 11468 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] 11469 ; SKX-NEXT: retq # sched: [7:1.00] 11470 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11471 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11> 11472 ret <8 x i64> %res 11473 } 11474 define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 11475 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0: 11476 ; GENERIC: # %bb.0: 11477 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11478 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] 11479 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 11480 ; GENERIC-NEXT: retq # sched: [1:1.00] 11481 ; 11482 ; SKX-LABEL: test_8xi64_masked_shuff_mem_mask0: 11483 ; SKX: # %bb.0: 11484 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11485 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] 11486 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 11487 ; SKX-NEXT: retq # sched: [7:1.00] 11488 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11489 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11> 11490 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11491 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11492 ret <8 x i64> %res 11493 } 11494 11495 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 11496 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: 11497 ; GENERIC: # %bb.0: 11498 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 11499 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] 11500 ; GENERIC-NEXT: retq # sched: [1:1.00] 11501 ; 11502 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: 11503 ; SKX: # %bb.0: 11504 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 11505 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] 11506 ; SKX-NEXT: retq # sched: [7:1.00] 11507 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11508 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11> 11509 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11510 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11511 ret <8 x i64> %res 11512 } 11513 11514 define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 11515 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1: 11516 ; GENERIC: # %bb.0: 11517 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11518 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00] 11519 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 11520 ; GENERIC-NEXT: retq # sched: [1:1.00] 11521 ; 11522 ; SKX-LABEL: test_8xi64_masked_shuff_mem_mask1: 11523 ; SKX: # %bb.0: 11524 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11525 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [10:1.00] 11526 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 11527 ; SKX-NEXT: retq # sched: [7:1.00] 11528 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11529 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 11530 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11531 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11532 ret <8 x i64> %res 11533 } 11534 11535 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 11536 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: 11537 ; GENERIC: # %bb.0: 11538 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 11539 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00] 11540 ; GENERIC-NEXT: retq # sched: [1:1.00] 11541 ; 11542 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: 11543 ; SKX: # %bb.0: 11544 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 11545 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [10:1.00] 11546 ; SKX-NEXT: retq # sched: [7:1.00] 11547 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11548 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9> 11549 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11550 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11551 ret <8 x i64> %res 11552 } 11553 11554 define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 11555 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2: 11556 ; GENERIC: # %bb.0: 11557 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11558 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00] 11559 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 11560 ; GENERIC-NEXT: retq # sched: [1:1.00] 11561 ; 11562 ; SKX-LABEL: test_8xi64_masked_shuff_mem_mask2: 11563 ; SKX: # %bb.0: 11564 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11565 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [10:1.00] 11566 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 11567 ; SKX-NEXT: retq # sched: [7:1.00] 11568 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11569 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 10, i32 11, i32 10, i32 11> 11570 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11571 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11572 ret <8 x i64> %res 11573 } 11574 11575 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 11576 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: 11577 ; GENERIC: # %bb.0: 11578 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 11579 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00] 11580 ; GENERIC-NEXT: retq # sched: [1:1.00] 11581 ; 11582 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: 11583 ; SKX: # %bb.0: 11584 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 11585 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [10:1.00] 11586 ; SKX-NEXT: retq # sched: [7:1.00] 11587 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11588 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 10, i32 11, i32 10, i32 11> 11589 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11590 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11591 ret <8 x i64> %res 11592 } 11593 11594 define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) { 11595 ; GENERIC-LABEL: test_8xi64_shuff_mem_mask3: 11596 ; GENERIC: # %bb.0: 11597 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] 11598 ; GENERIC-NEXT: retq # sched: [1:1.00] 11599 ; 11600 ; SKX-LABEL: test_8xi64_shuff_mem_mask3: 11601 ; SKX: # %bb.0: 11602 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] 11603 ; SKX-NEXT: retq # sched: [7:1.00] 11604 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11605 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11> 11606 ret <8 x i64> %res 11607 } 11608 define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { 11609 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3: 11610 ; GENERIC: # %bb.0: 11611 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 11612 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] 11613 ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] 11614 ; GENERIC-NEXT: retq # sched: [1:1.00] 11615 ; 11616 ; SKX-LABEL: test_8xi64_masked_shuff_mem_mask3: 11617 ; SKX: # %bb.0: 11618 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 11619 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] 11620 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] 11621 ; SKX-NEXT: retq # sched: [7:1.00] 11622 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11623 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11> 11624 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11625 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 11626 ret <8 x i64> %res 11627 } 11628 11629 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { 11630 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: 11631 ; GENERIC: # %bb.0: 11632 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 11633 ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] 11634 ; GENERIC-NEXT: retq # sched: [1:1.00] 11635 ; 11636 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: 11637 ; SKX: # %bb.0: 11638 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 11639 ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] 11640 ; SKX-NEXT: retq # sched: [7:1.00] 11641 %vec2 = load <8 x i64>, <8 x i64>* %vec2p 11642 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11> 11643 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 11644 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 11645 ret <8 x i64> %res 11646 } 11647 11648 define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2) { 11649 ; GENERIC-LABEL: test_4xfloat_unpack_low_mask0: 11650 ; GENERIC: # %bb.0: 11651 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11652 ; GENERIC-NEXT: retq # sched: [1:1.00] 11653 ; 11654 ; SKX-LABEL: test_4xfloat_unpack_low_mask0: 11655 ; SKX: # %bb.0: 11656 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11657 ; SKX-NEXT: retq # sched: [7:1.00] 11658 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11659 ret <4 x float> %res 11660 } 11661 define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 11662 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask0: 11663 ; GENERIC: # %bb.0: 11664 ; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 11665 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11666 ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 11667 ; GENERIC-NEXT: retq # sched: [1:1.00] 11668 ; 11669 ; SKX-LABEL: test_4xfloat_masked_unpack_low_mask0: 11670 ; SKX: # %bb.0: 11671 ; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 11672 ; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11673 ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 11674 ; SKX-NEXT: retq # sched: [7:1.00] 11675 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11676 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11677 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11678 ret <4 x float> %res 11679 } 11680 11681 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 11682 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: 11683 ; GENERIC: # %bb.0: 11684 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11685 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11686 ; GENERIC-NEXT: retq # sched: [1:1.00] 11687 ; 11688 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: 11689 ; SKX: # %bb.0: 11690 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11691 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11692 ; SKX-NEXT: retq # sched: [7:1.00] 11693 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11694 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11695 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11696 ret <4 x float> %res 11697 } 11698 define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 11699 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask1: 11700 ; GENERIC: # %bb.0: 11701 ; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 11702 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11703 ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 11704 ; GENERIC-NEXT: retq # sched: [1:1.00] 11705 ; 11706 ; SKX-LABEL: test_4xfloat_masked_unpack_low_mask1: 11707 ; SKX: # %bb.0: 11708 ; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 11709 ; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11710 ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 11711 ; SKX-NEXT: retq # sched: [7:1.00] 11712 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11713 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11714 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11715 ret <4 x float> %res 11716 } 11717 11718 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 11719 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: 11720 ; GENERIC: # %bb.0: 11721 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11722 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11723 ; GENERIC-NEXT: retq # sched: [1:1.00] 11724 ; 11725 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: 11726 ; SKX: # %bb.0: 11727 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11728 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11729 ; SKX-NEXT: retq # sched: [7:1.00] 11730 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11731 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11732 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11733 ret <4 x float> %res 11734 } 11735 define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 11736 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask2: 11737 ; GENERIC: # %bb.0: 11738 ; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 11739 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11740 ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 11741 ; GENERIC-NEXT: retq # sched: [1:1.00] 11742 ; 11743 ; SKX-LABEL: test_4xfloat_masked_unpack_low_mask2: 11744 ; SKX: # %bb.0: 11745 ; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 11746 ; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11747 ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 11748 ; SKX-NEXT: retq # sched: [7:1.00] 11749 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11750 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11751 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11752 ret <4 x float> %res 11753 } 11754 11755 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 11756 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: 11757 ; GENERIC: # %bb.0: 11758 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11759 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11760 ; GENERIC-NEXT: retq # sched: [1:1.00] 11761 ; 11762 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: 11763 ; SKX: # %bb.0: 11764 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11765 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11766 ; SKX-NEXT: retq # sched: [7:1.00] 11767 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11768 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11769 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11770 ret <4 x float> %res 11771 } 11772 define <4 x float> @test_4xfloat_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2) { 11773 ; GENERIC-LABEL: test_4xfloat_unpack_low_mask3: 11774 ; GENERIC: # %bb.0: 11775 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11776 ; GENERIC-NEXT: retq # sched: [1:1.00] 11777 ; 11778 ; SKX-LABEL: test_4xfloat_unpack_low_mask3: 11779 ; SKX: # %bb.0: 11780 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11781 ; SKX-NEXT: retq # sched: [7:1.00] 11782 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11783 ret <4 x float> %res 11784 } 11785 define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 11786 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask3: 11787 ; GENERIC: # %bb.0: 11788 ; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 11789 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11790 ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 11791 ; GENERIC-NEXT: retq # sched: [1:1.00] 11792 ; 11793 ; SKX-LABEL: test_4xfloat_masked_unpack_low_mask3: 11794 ; SKX: # %bb.0: 11795 ; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 11796 ; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11797 ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 11798 ; SKX-NEXT: retq # sched: [7:1.00] 11799 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11800 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11801 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11802 ret <4 x float> %res 11803 } 11804 11805 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 11806 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: 11807 ; GENERIC: # %bb.0: 11808 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11809 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11810 ; GENERIC-NEXT: retq # sched: [1:1.00] 11811 ; 11812 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: 11813 ; SKX: # %bb.0: 11814 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11815 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 11816 ; SKX-NEXT: retq # sched: [7:1.00] 11817 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11818 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11819 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11820 ret <4 x float> %res 11821 } 11822 define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { 11823 ; GENERIC-LABEL: test_4xfloat_unpack_low_mem_mask0: 11824 ; GENERIC: # %bb.0: 11825 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11826 ; GENERIC-NEXT: retq # sched: [1:1.00] 11827 ; 11828 ; SKX-LABEL: test_4xfloat_unpack_low_mem_mask0: 11829 ; SKX: # %bb.0: 11830 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11831 ; SKX-NEXT: retq # sched: [7:1.00] 11832 %vec2 = load <4 x float>, <4 x float>* %vec2p 11833 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11834 ret <4 x float> %res 11835 } 11836 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 11837 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: 11838 ; GENERIC: # %bb.0: 11839 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11840 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11841 ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 11842 ; GENERIC-NEXT: retq # sched: [1:1.00] 11843 ; 11844 ; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: 11845 ; SKX: # %bb.0: 11846 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11847 ; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11848 ; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 11849 ; SKX-NEXT: retq # sched: [7:1.00] 11850 %vec2 = load <4 x float>, <4 x float>* %vec2p 11851 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11852 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11853 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11854 ret <4 x float> %res 11855 } 11856 11857 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 11858 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: 11859 ; GENERIC: # %bb.0: 11860 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 11861 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11862 ; GENERIC-NEXT: retq # sched: [1:1.00] 11863 ; 11864 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: 11865 ; SKX: # %bb.0: 11866 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 11867 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11868 ; SKX-NEXT: retq # sched: [7:1.00] 11869 %vec2 = load <4 x float>, <4 x float>* %vec2p 11870 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11871 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11872 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11873 ret <4 x float> %res 11874 } 11875 11876 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 11877 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: 11878 ; GENERIC: # %bb.0: 11879 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11880 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11881 ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 11882 ; GENERIC-NEXT: retq # sched: [1:1.00] 11883 ; 11884 ; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: 11885 ; SKX: # %bb.0: 11886 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11887 ; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11888 ; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 11889 ; SKX-NEXT: retq # sched: [7:1.00] 11890 %vec2 = load <4 x float>, <4 x float>* %vec2p 11891 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11892 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11893 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11894 ret <4 x float> %res 11895 } 11896 11897 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 11898 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: 11899 ; GENERIC: # %bb.0: 11900 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 11901 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11902 ; GENERIC-NEXT: retq # sched: [1:1.00] 11903 ; 11904 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: 11905 ; SKX: # %bb.0: 11906 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 11907 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11908 ; SKX-NEXT: retq # sched: [7:1.00] 11909 %vec2 = load <4 x float>, <4 x float>* %vec2p 11910 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11911 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11912 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11913 ret <4 x float> %res 11914 } 11915 11916 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 11917 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: 11918 ; GENERIC: # %bb.0: 11919 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11920 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11921 ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 11922 ; GENERIC-NEXT: retq # sched: [1:1.00] 11923 ; 11924 ; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: 11925 ; SKX: # %bb.0: 11926 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11927 ; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11928 ; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 11929 ; SKX-NEXT: retq # sched: [7:1.00] 11930 %vec2 = load <4 x float>, <4 x float>* %vec2p 11931 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11932 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11933 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11934 ret <4 x float> %res 11935 } 11936 11937 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 11938 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: 11939 ; GENERIC: # %bb.0: 11940 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 11941 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11942 ; GENERIC-NEXT: retq # sched: [1:1.00] 11943 ; 11944 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: 11945 ; SKX: # %bb.0: 11946 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 11947 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11948 ; SKX-NEXT: retq # sched: [7:1.00] 11949 %vec2 = load <4 x float>, <4 x float>* %vec2p 11950 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11951 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11952 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 11953 ret <4 x float> %res 11954 } 11955 11956 define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { 11957 ; GENERIC-LABEL: test_4xfloat_unpack_low_mem_mask3: 11958 ; GENERIC: # %bb.0: 11959 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11960 ; GENERIC-NEXT: retq # sched: [1:1.00] 11961 ; 11962 ; SKX-LABEL: test_4xfloat_unpack_low_mem_mask3: 11963 ; SKX: # %bb.0: 11964 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11965 ; SKX-NEXT: retq # sched: [7:1.00] 11966 %vec2 = load <4 x float>, <4 x float>* %vec2p 11967 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11968 ret <4 x float> %res 11969 } 11970 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 11971 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: 11972 ; GENERIC: # %bb.0: 11973 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 11974 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11975 ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 11976 ; GENERIC-NEXT: retq # sched: [1:1.00] 11977 ; 11978 ; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: 11979 ; SKX: # %bb.0: 11980 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 11981 ; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11982 ; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 11983 ; SKX-NEXT: retq # sched: [7:1.00] 11984 %vec2 = load <4 x float>, <4 x float>* %vec2p 11985 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 11986 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 11987 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 11988 ret <4 x float> %res 11989 } 11990 11991 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 11992 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: 11993 ; GENERIC: # %bb.0: 11994 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 11995 ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 11996 ; GENERIC-NEXT: retq # sched: [1:1.00] 11997 ; 11998 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: 11999 ; SKX: # %bb.0: 12000 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 12001 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] 12002 ; SKX-NEXT: retq # sched: [7:1.00] 12003 %vec2 = load <4 x float>, <4 x float>* %vec2p 12004 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 12005 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 12006 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 12007 ret <4 x float> %res 12008 } 12009 12010 define <8 x float> @test_8xfloat_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2) { 12011 ; GENERIC-LABEL: test_8xfloat_unpack_low_mask0: 12012 ; GENERIC: # %bb.0: 12013 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12014 ; GENERIC-NEXT: retq # sched: [1:1.00] 12015 ; 12016 ; SKX-LABEL: test_8xfloat_unpack_low_mask0: 12017 ; SKX: # %bb.0: 12018 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12019 ; SKX-NEXT: retq # sched: [7:1.00] 12020 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12021 ret <8 x float> %res 12022 } 12023 define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 12024 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask0: 12025 ; GENERIC: # %bb.0: 12026 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 12027 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12028 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 12029 ; GENERIC-NEXT: retq # sched: [1:1.00] 12030 ; 12031 ; SKX-LABEL: test_8xfloat_masked_unpack_low_mask0: 12032 ; SKX: # %bb.0: 12033 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 12034 ; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12035 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 12036 ; SKX-NEXT: retq # sched: [7:1.00] 12037 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12038 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12039 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12040 ret <8 x float> %res 12041 } 12042 12043 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 12044 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: 12045 ; GENERIC: # %bb.0: 12046 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12047 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12048 ; GENERIC-NEXT: retq # sched: [1:1.00] 12049 ; 12050 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: 12051 ; SKX: # %bb.0: 12052 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12053 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12054 ; SKX-NEXT: retq # sched: [7:1.00] 12055 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12056 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12057 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12058 ret <8 x float> %res 12059 } 12060 define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 12061 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask1: 12062 ; GENERIC: # %bb.0: 12063 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 12064 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12065 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 12066 ; GENERIC-NEXT: retq # sched: [1:1.00] 12067 ; 12068 ; SKX-LABEL: test_8xfloat_masked_unpack_low_mask1: 12069 ; SKX: # %bb.0: 12070 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 12071 ; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12072 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 12073 ; SKX-NEXT: retq # sched: [7:1.00] 12074 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12075 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12076 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12077 ret <8 x float> %res 12078 } 12079 12080 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 12081 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: 12082 ; GENERIC: # %bb.0: 12083 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12084 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12085 ; GENERIC-NEXT: retq # sched: [1:1.00] 12086 ; 12087 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: 12088 ; SKX: # %bb.0: 12089 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12090 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12091 ; SKX-NEXT: retq # sched: [7:1.00] 12092 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12093 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12094 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12095 ret <8 x float> %res 12096 } 12097 define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 12098 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask2: 12099 ; GENERIC: # %bb.0: 12100 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 12101 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12102 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 12103 ; GENERIC-NEXT: retq # sched: [1:1.00] 12104 ; 12105 ; SKX-LABEL: test_8xfloat_masked_unpack_low_mask2: 12106 ; SKX: # %bb.0: 12107 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 12108 ; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12109 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 12110 ; SKX-NEXT: retq # sched: [7:1.00] 12111 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12112 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12113 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12114 ret <8 x float> %res 12115 } 12116 12117 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 12118 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: 12119 ; GENERIC: # %bb.0: 12120 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12121 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12122 ; GENERIC-NEXT: retq # sched: [1:1.00] 12123 ; 12124 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: 12125 ; SKX: # %bb.0: 12126 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12127 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12128 ; SKX-NEXT: retq # sched: [7:1.00] 12129 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12130 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12131 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12132 ret <8 x float> %res 12133 } 12134 define <8 x float> @test_8xfloat_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2) { 12135 ; GENERIC-LABEL: test_8xfloat_unpack_low_mask3: 12136 ; GENERIC: # %bb.0: 12137 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12138 ; GENERIC-NEXT: retq # sched: [1:1.00] 12139 ; 12140 ; SKX-LABEL: test_8xfloat_unpack_low_mask3: 12141 ; SKX: # %bb.0: 12142 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12143 ; SKX-NEXT: retq # sched: [7:1.00] 12144 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12145 ret <8 x float> %res 12146 } 12147 define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 12148 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask3: 12149 ; GENERIC: # %bb.0: 12150 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 12151 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12152 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 12153 ; GENERIC-NEXT: retq # sched: [1:1.00] 12154 ; 12155 ; SKX-LABEL: test_8xfloat_masked_unpack_low_mask3: 12156 ; SKX: # %bb.0: 12157 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 12158 ; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12159 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 12160 ; SKX-NEXT: retq # sched: [7:1.00] 12161 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12162 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12163 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12164 ret <8 x float> %res 12165 } 12166 12167 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 12168 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: 12169 ; GENERIC: # %bb.0: 12170 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12171 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12172 ; GENERIC-NEXT: retq # sched: [1:1.00] 12173 ; 12174 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: 12175 ; SKX: # %bb.0: 12176 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12177 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 12178 ; SKX-NEXT: retq # sched: [7:1.00] 12179 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12180 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12181 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12182 ret <8 x float> %res 12183 } 12184 define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { 12185 ; GENERIC-LABEL: test_8xfloat_unpack_low_mem_mask0: 12186 ; GENERIC: # %bb.0: 12187 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12188 ; GENERIC-NEXT: retq # sched: [1:1.00] 12189 ; 12190 ; SKX-LABEL: test_8xfloat_unpack_low_mem_mask0: 12191 ; SKX: # %bb.0: 12192 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12193 ; SKX-NEXT: retq # sched: [7:1.00] 12194 %vec2 = load <8 x float>, <8 x float>* %vec2p 12195 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12196 ret <8 x float> %res 12197 } 12198 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 12199 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: 12200 ; GENERIC: # %bb.0: 12201 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12202 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12203 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 12204 ; GENERIC-NEXT: retq # sched: [1:1.00] 12205 ; 12206 ; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: 12207 ; SKX: # %bb.0: 12208 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12209 ; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12210 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 12211 ; SKX-NEXT: retq # sched: [7:1.00] 12212 %vec2 = load <8 x float>, <8 x float>* %vec2p 12213 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12214 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12215 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12216 ret <8 x float> %res 12217 } 12218 12219 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 12220 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: 12221 ; GENERIC: # %bb.0: 12222 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 12223 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12224 ; GENERIC-NEXT: retq # sched: [1:1.00] 12225 ; 12226 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: 12227 ; SKX: # %bb.0: 12228 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 12229 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12230 ; SKX-NEXT: retq # sched: [7:1.00] 12231 %vec2 = load <8 x float>, <8 x float>* %vec2p 12232 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12233 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12234 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12235 ret <8 x float> %res 12236 } 12237 12238 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 12239 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: 12240 ; GENERIC: # %bb.0: 12241 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12242 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12243 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 12244 ; GENERIC-NEXT: retq # sched: [1:1.00] 12245 ; 12246 ; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: 12247 ; SKX: # %bb.0: 12248 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12249 ; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12250 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 12251 ; SKX-NEXT: retq # sched: [7:1.00] 12252 %vec2 = load <8 x float>, <8 x float>* %vec2p 12253 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12254 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12255 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12256 ret <8 x float> %res 12257 } 12258 12259 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 12260 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: 12261 ; GENERIC: # %bb.0: 12262 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 12263 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12264 ; GENERIC-NEXT: retq # sched: [1:1.00] 12265 ; 12266 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: 12267 ; SKX: # %bb.0: 12268 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 12269 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12270 ; SKX-NEXT: retq # sched: [7:1.00] 12271 %vec2 = load <8 x float>, <8 x float>* %vec2p 12272 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12273 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12274 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12275 ret <8 x float> %res 12276 } 12277 12278 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 12279 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: 12280 ; GENERIC: # %bb.0: 12281 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12282 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12283 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 12284 ; GENERIC-NEXT: retq # sched: [1:1.00] 12285 ; 12286 ; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: 12287 ; SKX: # %bb.0: 12288 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12289 ; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12290 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 12291 ; SKX-NEXT: retq # sched: [7:1.00] 12292 %vec2 = load <8 x float>, <8 x float>* %vec2p 12293 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12294 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12295 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12296 ret <8 x float> %res 12297 } 12298 12299 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 12300 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: 12301 ; GENERIC: # %bb.0: 12302 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 12303 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12304 ; GENERIC-NEXT: retq # sched: [1:1.00] 12305 ; 12306 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: 12307 ; SKX: # %bb.0: 12308 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 12309 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12310 ; SKX-NEXT: retq # sched: [7:1.00] 12311 %vec2 = load <8 x float>, <8 x float>* %vec2p 12312 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12313 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12314 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12315 ret <8 x float> %res 12316 } 12317 12318 define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { 12319 ; GENERIC-LABEL: test_8xfloat_unpack_low_mem_mask3: 12320 ; GENERIC: # %bb.0: 12321 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12322 ; GENERIC-NEXT: retq # sched: [1:1.00] 12323 ; 12324 ; SKX-LABEL: test_8xfloat_unpack_low_mem_mask3: 12325 ; SKX: # %bb.0: 12326 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12327 ; SKX-NEXT: retq # sched: [7:1.00] 12328 %vec2 = load <8 x float>, <8 x float>* %vec2p 12329 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12330 ret <8 x float> %res 12331 } 12332 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 12333 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: 12334 ; GENERIC: # %bb.0: 12335 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 12336 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12337 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 12338 ; GENERIC-NEXT: retq # sched: [1:1.00] 12339 ; 12340 ; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: 12341 ; SKX: # %bb.0: 12342 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 12343 ; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12344 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 12345 ; SKX-NEXT: retq # sched: [7:1.00] 12346 %vec2 = load <8 x float>, <8 x float>* %vec2p 12347 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12348 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12349 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 12350 ret <8 x float> %res 12351 } 12352 12353 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 12354 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: 12355 ; GENERIC: # %bb.0: 12356 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 12357 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12358 ; GENERIC-NEXT: retq # sched: [1:1.00] 12359 ; 12360 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: 12361 ; SKX: # %bb.0: 12362 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 12363 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 12364 ; SKX-NEXT: retq # sched: [7:1.00] 12365 %vec2 = load <8 x float>, <8 x float>* %vec2p 12366 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 12367 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 12368 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 12369 ret <8 x float> %res 12370 } 12371 12372 define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2) { 12373 ; GENERIC-LABEL: test_16xfloat_unpack_low_mask0: 12374 ; GENERIC: # %bb.0: 12375 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12376 ; GENERIC-NEXT: retq # sched: [1:1.00] 12377 ; 12378 ; SKX-LABEL: test_16xfloat_unpack_low_mask0: 12379 ; SKX: # %bb.0: 12380 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12381 ; SKX-NEXT: retq # sched: [7:1.00] 12382 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12383 ret <16 x float> %res 12384 } 12385 define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 12386 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask0: 12387 ; GENERIC: # %bb.0: 12388 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 12389 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12390 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 12391 ; GENERIC-NEXT: retq # sched: [1:1.00] 12392 ; 12393 ; SKX-LABEL: test_16xfloat_masked_unpack_low_mask0: 12394 ; SKX: # %bb.0: 12395 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 12396 ; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12397 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 12398 ; SKX-NEXT: retq # sched: [7:1.00] 12399 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12400 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12401 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12402 ret <16 x float> %res 12403 } 12404 12405 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 12406 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: 12407 ; GENERIC: # %bb.0: 12408 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12409 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12410 ; GENERIC-NEXT: retq # sched: [1:1.00] 12411 ; 12412 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: 12413 ; SKX: # %bb.0: 12414 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12415 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12416 ; SKX-NEXT: retq # sched: [7:1.00] 12417 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12418 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12419 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12420 ret <16 x float> %res 12421 } 12422 define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 12423 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask1: 12424 ; GENERIC: # %bb.0: 12425 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 12426 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12427 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 12428 ; GENERIC-NEXT: retq # sched: [1:1.00] 12429 ; 12430 ; SKX-LABEL: test_16xfloat_masked_unpack_low_mask1: 12431 ; SKX: # %bb.0: 12432 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 12433 ; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12434 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 12435 ; SKX-NEXT: retq # sched: [7:1.00] 12436 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12437 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12438 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12439 ret <16 x float> %res 12440 } 12441 12442 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 12443 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: 12444 ; GENERIC: # %bb.0: 12445 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12446 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12447 ; GENERIC-NEXT: retq # sched: [1:1.00] 12448 ; 12449 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: 12450 ; SKX: # %bb.0: 12451 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12452 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12453 ; SKX-NEXT: retq # sched: [7:1.00] 12454 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12455 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12456 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12457 ret <16 x float> %res 12458 } 12459 define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 12460 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask2: 12461 ; GENERIC: # %bb.0: 12462 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 12463 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12464 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 12465 ; GENERIC-NEXT: retq # sched: [1:1.00] 12466 ; 12467 ; SKX-LABEL: test_16xfloat_masked_unpack_low_mask2: 12468 ; SKX: # %bb.0: 12469 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 12470 ; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12471 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 12472 ; SKX-NEXT: retq # sched: [7:1.00] 12473 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12474 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12475 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12476 ret <16 x float> %res 12477 } 12478 12479 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 12480 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: 12481 ; GENERIC: # %bb.0: 12482 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12483 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12484 ; GENERIC-NEXT: retq # sched: [1:1.00] 12485 ; 12486 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: 12487 ; SKX: # %bb.0: 12488 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12489 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12490 ; SKX-NEXT: retq # sched: [7:1.00] 12491 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12492 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12493 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12494 ret <16 x float> %res 12495 } 12496 define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2) { 12497 ; GENERIC-LABEL: test_16xfloat_unpack_low_mask3: 12498 ; GENERIC: # %bb.0: 12499 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12500 ; GENERIC-NEXT: retq # sched: [1:1.00] 12501 ; 12502 ; SKX-LABEL: test_16xfloat_unpack_low_mask3: 12503 ; SKX: # %bb.0: 12504 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12505 ; SKX-NEXT: retq # sched: [7:1.00] 12506 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12507 ret <16 x float> %res 12508 } 12509 define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 12510 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask3: 12511 ; GENERIC: # %bb.0: 12512 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 12513 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12514 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 12515 ; GENERIC-NEXT: retq # sched: [1:1.00] 12516 ; 12517 ; SKX-LABEL: test_16xfloat_masked_unpack_low_mask3: 12518 ; SKX: # %bb.0: 12519 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 12520 ; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12521 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 12522 ; SKX-NEXT: retq # sched: [7:1.00] 12523 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12524 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12525 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12526 ret <16 x float> %res 12527 } 12528 12529 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 12530 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: 12531 ; GENERIC: # %bb.0: 12532 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12533 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12534 ; GENERIC-NEXT: retq # sched: [1:1.00] 12535 ; 12536 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: 12537 ; SKX: # %bb.0: 12538 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12539 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] 12540 ; SKX-NEXT: retq # sched: [7:1.00] 12541 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12542 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12543 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12544 ret <16 x float> %res 12545 } 12546 define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { 12547 ; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask0: 12548 ; GENERIC: # %bb.0: 12549 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12550 ; GENERIC-NEXT: retq # sched: [1:1.00] 12551 ; 12552 ; SKX-LABEL: test_16xfloat_unpack_low_mem_mask0: 12553 ; SKX: # %bb.0: 12554 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12555 ; SKX-NEXT: retq # sched: [7:1.00] 12556 %vec2 = load <16 x float>, <16 x float>* %vec2p 12557 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12558 ret <16 x float> %res 12559 } 12560 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 12561 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: 12562 ; GENERIC: # %bb.0: 12563 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12564 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12565 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 12566 ; GENERIC-NEXT: retq # sched: [1:1.00] 12567 ; 12568 ; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: 12569 ; SKX: # %bb.0: 12570 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12571 ; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12572 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 12573 ; SKX-NEXT: retq # sched: [7:1.00] 12574 %vec2 = load <16 x float>, <16 x float>* %vec2p 12575 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12576 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12577 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12578 ret <16 x float> %res 12579 } 12580 12581 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 12582 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: 12583 ; GENERIC: # %bb.0: 12584 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 12585 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12586 ; GENERIC-NEXT: retq # sched: [1:1.00] 12587 ; 12588 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: 12589 ; SKX: # %bb.0: 12590 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 12591 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12592 ; SKX-NEXT: retq # sched: [7:1.00] 12593 %vec2 = load <16 x float>, <16 x float>* %vec2p 12594 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12595 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12596 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12597 ret <16 x float> %res 12598 } 12599 12600 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 12601 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: 12602 ; GENERIC: # %bb.0: 12603 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12604 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12605 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 12606 ; GENERIC-NEXT: retq # sched: [1:1.00] 12607 ; 12608 ; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: 12609 ; SKX: # %bb.0: 12610 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12611 ; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12612 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 12613 ; SKX-NEXT: retq # sched: [7:1.00] 12614 %vec2 = load <16 x float>, <16 x float>* %vec2p 12615 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12616 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12617 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12618 ret <16 x float> %res 12619 } 12620 12621 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 12622 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: 12623 ; GENERIC: # %bb.0: 12624 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 12625 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12626 ; GENERIC-NEXT: retq # sched: [1:1.00] 12627 ; 12628 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: 12629 ; SKX: # %bb.0: 12630 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 12631 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12632 ; SKX-NEXT: retq # sched: [7:1.00] 12633 %vec2 = load <16 x float>, <16 x float>* %vec2p 12634 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12635 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12636 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12637 ret <16 x float> %res 12638 } 12639 12640 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 12641 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: 12642 ; GENERIC: # %bb.0: 12643 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12644 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12645 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 12646 ; GENERIC-NEXT: retq # sched: [1:1.00] 12647 ; 12648 ; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: 12649 ; SKX: # %bb.0: 12650 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12651 ; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12652 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 12653 ; SKX-NEXT: retq # sched: [7:1.00] 12654 %vec2 = load <16 x float>, <16 x float>* %vec2p 12655 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12656 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12657 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12658 ret <16 x float> %res 12659 } 12660 12661 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 12662 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: 12663 ; GENERIC: # %bb.0: 12664 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 12665 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12666 ; GENERIC-NEXT: retq # sched: [1:1.00] 12667 ; 12668 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: 12669 ; SKX: # %bb.0: 12670 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 12671 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12672 ; SKX-NEXT: retq # sched: [7:1.00] 12673 %vec2 = load <16 x float>, <16 x float>* %vec2p 12674 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12675 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12676 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12677 ret <16 x float> %res 12678 } 12679 12680 define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { 12681 ; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask3: 12682 ; GENERIC: # %bb.0: 12683 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12684 ; GENERIC-NEXT: retq # sched: [1:1.00] 12685 ; 12686 ; SKX-LABEL: test_16xfloat_unpack_low_mem_mask3: 12687 ; SKX: # %bb.0: 12688 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12689 ; SKX-NEXT: retq # sched: [7:1.00] 12690 %vec2 = load <16 x float>, <16 x float>* %vec2p 12691 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12692 ret <16 x float> %res 12693 } 12694 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 12695 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: 12696 ; GENERIC: # %bb.0: 12697 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 12698 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12699 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 12700 ; GENERIC-NEXT: retq # sched: [1:1.00] 12701 ; 12702 ; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: 12703 ; SKX: # %bb.0: 12704 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 12705 ; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12706 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 12707 ; SKX-NEXT: retq # sched: [7:1.00] 12708 %vec2 = load <16 x float>, <16 x float>* %vec2p 12709 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12710 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12711 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 12712 ret <16 x float> %res 12713 } 12714 12715 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 12716 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: 12717 ; GENERIC: # %bb.0: 12718 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 12719 ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12720 ; GENERIC-NEXT: retq # sched: [1:1.00] 12721 ; 12722 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: 12723 ; SKX: # %bb.0: 12724 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 12725 ; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] 12726 ; SKX-NEXT: retq # sched: [7:1.00] 12727 %vec2 = load <16 x float>, <16 x float>* %vec2p 12728 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 12729 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 12730 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 12731 ret <16 x float> %res 12732 } 12733 12734 define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) { 12735 ; GENERIC-LABEL: test_2xdouble_unpack_low_mask0: 12736 ; GENERIC: # %bb.0: 12737 ; GENERIC-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 12738 ; GENERIC-NEXT: retq # sched: [1:1.00] 12739 ; 12740 ; SKX-LABEL: test_2xdouble_unpack_low_mask0: 12741 ; SKX: # %bb.0: 12742 ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 12743 ; SKX-NEXT: retq # sched: [7:1.00] 12744 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12745 ret <2 x double> %res 12746 } 12747 define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { 12748 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask0: 12749 ; GENERIC: # %bb.0: 12750 ; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] 12751 ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] 12752 ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 12753 ; GENERIC-NEXT: retq # sched: [1:1.00] 12754 ; 12755 ; SKX-LABEL: test_2xdouble_masked_unpack_low_mask0: 12756 ; SKX: # %bb.0: 12757 ; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] 12758 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] 12759 ; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] 12760 ; SKX-NEXT: retq # sched: [7:1.00] 12761 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12762 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12763 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 12764 ret <2 x double> %res 12765 } 12766 12767 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { 12768 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: 12769 ; GENERIC: # %bb.0: 12770 ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 12771 ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] 12772 ; GENERIC-NEXT: retq # sched: [1:1.00] 12773 ; 12774 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: 12775 ; SKX: # %bb.0: 12776 ; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 12777 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] 12778 ; SKX-NEXT: retq # sched: [7:1.00] 12779 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12780 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12781 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 12782 ret <2 x double> %res 12783 } 12784 define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { 12785 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask1: 12786 ; GENERIC: # %bb.0: 12787 ; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] 12788 ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] 12789 ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 12790 ; GENERIC-NEXT: retq # sched: [1:1.00] 12791 ; 12792 ; SKX-LABEL: test_2xdouble_masked_unpack_low_mask1: 12793 ; SKX: # %bb.0: 12794 ; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] 12795 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] 12796 ; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] 12797 ; SKX-NEXT: retq # sched: [7:1.00] 12798 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12799 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12800 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 12801 ret <2 x double> %res 12802 } 12803 12804 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { 12805 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: 12806 ; GENERIC: # %bb.0: 12807 ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 12808 ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] 12809 ; GENERIC-NEXT: retq # sched: [1:1.00] 12810 ; 12811 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: 12812 ; SKX: # %bb.0: 12813 ; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 12814 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] 12815 ; SKX-NEXT: retq # sched: [7:1.00] 12816 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12817 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12818 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 12819 ret <2 x double> %res 12820 } 12821 define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { 12822 ; GENERIC-LABEL: test_2xdouble_unpack_low_mem_mask0: 12823 ; GENERIC: # %bb.0: 12824 ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] 12825 ; GENERIC-NEXT: retq # sched: [1:1.00] 12826 ; 12827 ; SKX-LABEL: test_2xdouble_unpack_low_mem_mask0: 12828 ; SKX: # %bb.0: 12829 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] 12830 ; SKX-NEXT: retq # sched: [7:1.00] 12831 %vec2 = load <2 x double>, <2 x double>* %vec2p 12832 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12833 ret <2 x double> %res 12834 } 12835 define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { 12836 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: 12837 ; GENERIC: # %bb.0: 12838 ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 12839 ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] 12840 ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] 12841 ; GENERIC-NEXT: retq # sched: [1:1.00] 12842 ; 12843 ; SKX-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: 12844 ; SKX: # %bb.0: 12845 ; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 12846 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] 12847 ; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] 12848 ; SKX-NEXT: retq # sched: [7:1.00] 12849 %vec2 = load <2 x double>, <2 x double>* %vec2p 12850 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12851 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12852 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 12853 ret <2 x double> %res 12854 } 12855 12856 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { 12857 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: 12858 ; GENERIC: # %bb.0: 12859 ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] 12860 ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] 12861 ; GENERIC-NEXT: retq # sched: [1:1.00] 12862 ; 12863 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: 12864 ; SKX: # %bb.0: 12865 ; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] 12866 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] 12867 ; SKX-NEXT: retq # sched: [7:1.00] 12868 %vec2 = load <2 x double>, <2 x double>* %vec2p 12869 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12870 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12871 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 12872 ret <2 x double> %res 12873 } 12874 12875 define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { 12876 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: 12877 ; GENERIC: # %bb.0: 12878 ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 12879 ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] 12880 ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] 12881 ; GENERIC-NEXT: retq # sched: [1:1.00] 12882 ; 12883 ; SKX-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: 12884 ; SKX: # %bb.0: 12885 ; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 12886 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] 12887 ; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] 12888 ; SKX-NEXT: retq # sched: [7:1.00] 12889 %vec2 = load <2 x double>, <2 x double>* %vec2p 12890 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12891 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12892 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 12893 ret <2 x double> %res 12894 } 12895 12896 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { 12897 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: 12898 ; GENERIC: # %bb.0: 12899 ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] 12900 ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] 12901 ; GENERIC-NEXT: retq # sched: [1:1.00] 12902 ; 12903 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: 12904 ; SKX: # %bb.0: 12905 ; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] 12906 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] 12907 ; SKX-NEXT: retq # sched: [7:1.00] 12908 %vec2 = load <2 x double>, <2 x double>* %vec2p 12909 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> 12910 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 12911 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 12912 ret <2 x double> %res 12913 } 12914 12915 define <4 x double> @test_4xdouble_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2) { 12916 ; GENERIC-LABEL: test_4xdouble_unpack_low_mask0: 12917 ; GENERIC: # %bb.0: 12918 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12919 ; GENERIC-NEXT: retq # sched: [1:1.00] 12920 ; 12921 ; SKX-LABEL: test_4xdouble_unpack_low_mask0: 12922 ; SKX: # %bb.0: 12923 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12924 ; SKX-NEXT: retq # sched: [7:1.00] 12925 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 12926 ret <4 x double> %res 12927 } 12928 define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 12929 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask0: 12930 ; GENERIC: # %bb.0: 12931 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 12932 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12933 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 12934 ; GENERIC-NEXT: retq # sched: [1:1.00] 12935 ; 12936 ; SKX-LABEL: test_4xdouble_masked_unpack_low_mask0: 12937 ; SKX: # %bb.0: 12938 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 12939 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12940 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 12941 ; SKX-NEXT: retq # sched: [7:1.00] 12942 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 12943 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 12944 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 12945 ret <4 x double> %res 12946 } 12947 12948 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 12949 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: 12950 ; GENERIC: # %bb.0: 12951 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 12952 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12953 ; GENERIC-NEXT: retq # sched: [1:1.00] 12954 ; 12955 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: 12956 ; SKX: # %bb.0: 12957 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 12958 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12959 ; SKX-NEXT: retq # sched: [7:1.00] 12960 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 12961 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 12962 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 12963 ret <4 x double> %res 12964 } 12965 define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 12966 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask1: 12967 ; GENERIC: # %bb.0: 12968 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 12969 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12970 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 12971 ; GENERIC-NEXT: retq # sched: [1:1.00] 12972 ; 12973 ; SKX-LABEL: test_4xdouble_masked_unpack_low_mask1: 12974 ; SKX: # %bb.0: 12975 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 12976 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12977 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 12978 ; SKX-NEXT: retq # sched: [7:1.00] 12979 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 12980 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 12981 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 12982 ret <4 x double> %res 12983 } 12984 12985 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 12986 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: 12987 ; GENERIC: # %bb.0: 12988 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 12989 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12990 ; GENERIC-NEXT: retq # sched: [1:1.00] 12991 ; 12992 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: 12993 ; SKX: # %bb.0: 12994 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 12995 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 12996 ; SKX-NEXT: retq # sched: [7:1.00] 12997 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 12998 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 12999 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13000 ret <4 x double> %res 13001 } 13002 define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 13003 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask2: 13004 ; GENERIC: # %bb.0: 13005 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 13006 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13007 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 13008 ; GENERIC-NEXT: retq # sched: [1:1.00] 13009 ; 13010 ; SKX-LABEL: test_4xdouble_masked_unpack_low_mask2: 13011 ; SKX: # %bb.0: 13012 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 13013 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13014 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 13015 ; SKX-NEXT: retq # sched: [7:1.00] 13016 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13017 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13018 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13019 ret <4 x double> %res 13020 } 13021 13022 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 13023 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: 13024 ; GENERIC: # %bb.0: 13025 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13026 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13027 ; GENERIC-NEXT: retq # sched: [1:1.00] 13028 ; 13029 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: 13030 ; SKX: # %bb.0: 13031 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13032 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13033 ; SKX-NEXT: retq # sched: [7:1.00] 13034 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13035 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13036 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13037 ret <4 x double> %res 13038 } 13039 define <4 x double> @test_4xdouble_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2) { 13040 ; GENERIC-LABEL: test_4xdouble_unpack_low_mask3: 13041 ; GENERIC: # %bb.0: 13042 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13043 ; GENERIC-NEXT: retq # sched: [1:1.00] 13044 ; 13045 ; SKX-LABEL: test_4xdouble_unpack_low_mask3: 13046 ; SKX: # %bb.0: 13047 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13048 ; SKX-NEXT: retq # sched: [7:1.00] 13049 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13050 ret <4 x double> %res 13051 } 13052 define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 13053 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask3: 13054 ; GENERIC: # %bb.0: 13055 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 13056 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13057 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 13058 ; GENERIC-NEXT: retq # sched: [1:1.00] 13059 ; 13060 ; SKX-LABEL: test_4xdouble_masked_unpack_low_mask3: 13061 ; SKX: # %bb.0: 13062 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 13063 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13064 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 13065 ; SKX-NEXT: retq # sched: [7:1.00] 13066 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13067 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13068 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13069 ret <4 x double> %res 13070 } 13071 13072 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 13073 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: 13074 ; GENERIC: # %bb.0: 13075 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13076 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13077 ; GENERIC-NEXT: retq # sched: [1:1.00] 13078 ; 13079 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: 13080 ; SKX: # %bb.0: 13081 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13082 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 13083 ; SKX-NEXT: retq # sched: [7:1.00] 13084 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13085 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13086 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13087 ret <4 x double> %res 13088 } 13089 define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { 13090 ; GENERIC-LABEL: test_4xdouble_unpack_low_mem_mask0: 13091 ; GENERIC: # %bb.0: 13092 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13093 ; GENERIC-NEXT: retq # sched: [1:1.00] 13094 ; 13095 ; SKX-LABEL: test_4xdouble_unpack_low_mem_mask0: 13096 ; SKX: # %bb.0: 13097 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13098 ; SKX-NEXT: retq # sched: [7:1.00] 13099 %vec2 = load <4 x double>, <4 x double>* %vec2p 13100 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13101 ret <4 x double> %res 13102 } 13103 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 13104 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: 13105 ; GENERIC: # %bb.0: 13106 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13107 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13108 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 13109 ; GENERIC-NEXT: retq # sched: [1:1.00] 13110 ; 13111 ; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: 13112 ; SKX: # %bb.0: 13113 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13114 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13115 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 13116 ; SKX-NEXT: retq # sched: [7:1.00] 13117 %vec2 = load <4 x double>, <4 x double>* %vec2p 13118 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13119 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13120 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13121 ret <4 x double> %res 13122 } 13123 13124 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 13125 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: 13126 ; GENERIC: # %bb.0: 13127 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 13128 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13129 ; GENERIC-NEXT: retq # sched: [1:1.00] 13130 ; 13131 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: 13132 ; SKX: # %bb.0: 13133 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 13134 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13135 ; SKX-NEXT: retq # sched: [7:1.00] 13136 %vec2 = load <4 x double>, <4 x double>* %vec2p 13137 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13138 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13139 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13140 ret <4 x double> %res 13141 } 13142 13143 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 13144 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: 13145 ; GENERIC: # %bb.0: 13146 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13147 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13148 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 13149 ; GENERIC-NEXT: retq # sched: [1:1.00] 13150 ; 13151 ; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: 13152 ; SKX: # %bb.0: 13153 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13154 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13155 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 13156 ; SKX-NEXT: retq # sched: [7:1.00] 13157 %vec2 = load <4 x double>, <4 x double>* %vec2p 13158 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13159 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13160 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13161 ret <4 x double> %res 13162 } 13163 13164 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 13165 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: 13166 ; GENERIC: # %bb.0: 13167 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 13168 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13169 ; GENERIC-NEXT: retq # sched: [1:1.00] 13170 ; 13171 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: 13172 ; SKX: # %bb.0: 13173 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 13174 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13175 ; SKX-NEXT: retq # sched: [7:1.00] 13176 %vec2 = load <4 x double>, <4 x double>* %vec2p 13177 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13178 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13179 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13180 ret <4 x double> %res 13181 } 13182 13183 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 13184 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: 13185 ; GENERIC: # %bb.0: 13186 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13187 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13188 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 13189 ; GENERIC-NEXT: retq # sched: [1:1.00] 13190 ; 13191 ; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: 13192 ; SKX: # %bb.0: 13193 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13194 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13195 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 13196 ; SKX-NEXT: retq # sched: [7:1.00] 13197 %vec2 = load <4 x double>, <4 x double>* %vec2p 13198 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13199 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13200 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13201 ret <4 x double> %res 13202 } 13203 13204 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 13205 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: 13206 ; GENERIC: # %bb.0: 13207 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 13208 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13209 ; GENERIC-NEXT: retq # sched: [1:1.00] 13210 ; 13211 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: 13212 ; SKX: # %bb.0: 13213 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 13214 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13215 ; SKX-NEXT: retq # sched: [7:1.00] 13216 %vec2 = load <4 x double>, <4 x double>* %vec2p 13217 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13218 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13219 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13220 ret <4 x double> %res 13221 } 13222 13223 define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { 13224 ; GENERIC-LABEL: test_4xdouble_unpack_low_mem_mask3: 13225 ; GENERIC: # %bb.0: 13226 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13227 ; GENERIC-NEXT: retq # sched: [1:1.00] 13228 ; 13229 ; SKX-LABEL: test_4xdouble_unpack_low_mem_mask3: 13230 ; SKX: # %bb.0: 13231 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13232 ; SKX-NEXT: retq # sched: [7:1.00] 13233 %vec2 = load <4 x double>, <4 x double>* %vec2p 13234 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13235 ret <4 x double> %res 13236 } 13237 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 13238 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: 13239 ; GENERIC: # %bb.0: 13240 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 13241 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13242 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 13243 ; GENERIC-NEXT: retq # sched: [1:1.00] 13244 ; 13245 ; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: 13246 ; SKX: # %bb.0: 13247 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 13248 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13249 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 13250 ; SKX-NEXT: retq # sched: [7:1.00] 13251 %vec2 = load <4 x double>, <4 x double>* %vec2p 13252 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13253 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13254 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 13255 ret <4 x double> %res 13256 } 13257 13258 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 13259 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: 13260 ; GENERIC: # %bb.0: 13261 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 13262 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13263 ; GENERIC-NEXT: retq # sched: [1:1.00] 13264 ; 13265 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: 13266 ; SKX: # %bb.0: 13267 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 13268 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 13269 ; SKX-NEXT: retq # sched: [7:1.00] 13270 %vec2 = load <4 x double>, <4 x double>* %vec2p 13271 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 13272 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 13273 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 13274 ret <4 x double> %res 13275 } 13276 13277 define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2) { 13278 ; GENERIC-LABEL: test_8xdouble_unpack_low_mask0: 13279 ; GENERIC: # %bb.0: 13280 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13281 ; GENERIC-NEXT: retq # sched: [1:1.00] 13282 ; 13283 ; SKX-LABEL: test_8xdouble_unpack_low_mask0: 13284 ; SKX: # %bb.0: 13285 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13286 ; SKX-NEXT: retq # sched: [7:1.00] 13287 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13288 ret <8 x double> %res 13289 } 13290 define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 13291 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask0: 13292 ; GENERIC: # %bb.0: 13293 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 13294 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13295 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 13296 ; GENERIC-NEXT: retq # sched: [1:1.00] 13297 ; 13298 ; SKX-LABEL: test_8xdouble_masked_unpack_low_mask0: 13299 ; SKX: # %bb.0: 13300 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 13301 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13302 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 13303 ; SKX-NEXT: retq # sched: [7:1.00] 13304 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13305 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13306 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13307 ret <8 x double> %res 13308 } 13309 13310 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 13311 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: 13312 ; GENERIC: # %bb.0: 13313 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13314 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13315 ; GENERIC-NEXT: retq # sched: [1:1.00] 13316 ; 13317 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: 13318 ; SKX: # %bb.0: 13319 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13320 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13321 ; SKX-NEXT: retq # sched: [7:1.00] 13322 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13323 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13324 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13325 ret <8 x double> %res 13326 } 13327 define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 13328 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask1: 13329 ; GENERIC: # %bb.0: 13330 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 13331 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13332 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 13333 ; GENERIC-NEXT: retq # sched: [1:1.00] 13334 ; 13335 ; SKX-LABEL: test_8xdouble_masked_unpack_low_mask1: 13336 ; SKX: # %bb.0: 13337 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 13338 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13339 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 13340 ; SKX-NEXT: retq # sched: [7:1.00] 13341 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13342 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13343 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13344 ret <8 x double> %res 13345 } 13346 13347 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 13348 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: 13349 ; GENERIC: # %bb.0: 13350 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13351 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13352 ; GENERIC-NEXT: retq # sched: [1:1.00] 13353 ; 13354 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: 13355 ; SKX: # %bb.0: 13356 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13357 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13358 ; SKX-NEXT: retq # sched: [7:1.00] 13359 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13360 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13361 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13362 ret <8 x double> %res 13363 } 13364 define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 13365 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask2: 13366 ; GENERIC: # %bb.0: 13367 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 13368 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13369 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 13370 ; GENERIC-NEXT: retq # sched: [1:1.00] 13371 ; 13372 ; SKX-LABEL: test_8xdouble_masked_unpack_low_mask2: 13373 ; SKX: # %bb.0: 13374 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 13375 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13376 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 13377 ; SKX-NEXT: retq # sched: [7:1.00] 13378 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13379 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13380 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13381 ret <8 x double> %res 13382 } 13383 13384 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 13385 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: 13386 ; GENERIC: # %bb.0: 13387 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13388 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13389 ; GENERIC-NEXT: retq # sched: [1:1.00] 13390 ; 13391 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: 13392 ; SKX: # %bb.0: 13393 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13394 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13395 ; SKX-NEXT: retq # sched: [7:1.00] 13396 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13397 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13398 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13399 ret <8 x double> %res 13400 } 13401 define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2) { 13402 ; GENERIC-LABEL: test_8xdouble_unpack_low_mask3: 13403 ; GENERIC: # %bb.0: 13404 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13405 ; GENERIC-NEXT: retq # sched: [1:1.00] 13406 ; 13407 ; SKX-LABEL: test_8xdouble_unpack_low_mask3: 13408 ; SKX: # %bb.0: 13409 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13410 ; SKX-NEXT: retq # sched: [7:1.00] 13411 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13412 ret <8 x double> %res 13413 } 13414 define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 13415 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask3: 13416 ; GENERIC: # %bb.0: 13417 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 13418 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13419 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 13420 ; GENERIC-NEXT: retq # sched: [1:1.00] 13421 ; 13422 ; SKX-LABEL: test_8xdouble_masked_unpack_low_mask3: 13423 ; SKX: # %bb.0: 13424 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 13425 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13426 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 13427 ; SKX-NEXT: retq # sched: [7:1.00] 13428 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13429 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13430 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13431 ret <8 x double> %res 13432 } 13433 13434 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 13435 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: 13436 ; GENERIC: # %bb.0: 13437 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13438 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13439 ; GENERIC-NEXT: retq # sched: [1:1.00] 13440 ; 13441 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: 13442 ; SKX: # %bb.0: 13443 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13444 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] 13445 ; SKX-NEXT: retq # sched: [7:1.00] 13446 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13447 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13448 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13449 ret <8 x double> %res 13450 } 13451 define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { 13452 ; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask0: 13453 ; GENERIC: # %bb.0: 13454 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13455 ; GENERIC-NEXT: retq # sched: [1:1.00] 13456 ; 13457 ; SKX-LABEL: test_8xdouble_unpack_low_mem_mask0: 13458 ; SKX: # %bb.0: 13459 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13460 ; SKX-NEXT: retq # sched: [7:1.00] 13461 %vec2 = load <8 x double>, <8 x double>* %vec2p 13462 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13463 ret <8 x double> %res 13464 } 13465 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 13466 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: 13467 ; GENERIC: # %bb.0: 13468 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13469 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13470 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 13471 ; GENERIC-NEXT: retq # sched: [1:1.00] 13472 ; 13473 ; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: 13474 ; SKX: # %bb.0: 13475 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13476 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13477 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 13478 ; SKX-NEXT: retq # sched: [7:1.00] 13479 %vec2 = load <8 x double>, <8 x double>* %vec2p 13480 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13481 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13482 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13483 ret <8 x double> %res 13484 } 13485 13486 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 13487 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: 13488 ; GENERIC: # %bb.0: 13489 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 13490 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13491 ; GENERIC-NEXT: retq # sched: [1:1.00] 13492 ; 13493 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: 13494 ; SKX: # %bb.0: 13495 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 13496 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13497 ; SKX-NEXT: retq # sched: [7:1.00] 13498 %vec2 = load <8 x double>, <8 x double>* %vec2p 13499 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13500 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13501 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13502 ret <8 x double> %res 13503 } 13504 13505 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 13506 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: 13507 ; GENERIC: # %bb.0: 13508 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13509 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13510 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 13511 ; GENERIC-NEXT: retq # sched: [1:1.00] 13512 ; 13513 ; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: 13514 ; SKX: # %bb.0: 13515 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13516 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13517 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 13518 ; SKX-NEXT: retq # sched: [7:1.00] 13519 %vec2 = load <8 x double>, <8 x double>* %vec2p 13520 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13521 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13522 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13523 ret <8 x double> %res 13524 } 13525 13526 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 13527 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: 13528 ; GENERIC: # %bb.0: 13529 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 13530 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13531 ; GENERIC-NEXT: retq # sched: [1:1.00] 13532 ; 13533 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: 13534 ; SKX: # %bb.0: 13535 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 13536 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13537 ; SKX-NEXT: retq # sched: [7:1.00] 13538 %vec2 = load <8 x double>, <8 x double>* %vec2p 13539 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13540 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13541 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13542 ret <8 x double> %res 13543 } 13544 13545 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 13546 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: 13547 ; GENERIC: # %bb.0: 13548 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13549 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13550 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 13551 ; GENERIC-NEXT: retq # sched: [1:1.00] 13552 ; 13553 ; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: 13554 ; SKX: # %bb.0: 13555 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13556 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13557 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 13558 ; SKX-NEXT: retq # sched: [7:1.00] 13559 %vec2 = load <8 x double>, <8 x double>* %vec2p 13560 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13561 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13562 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13563 ret <8 x double> %res 13564 } 13565 13566 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 13567 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: 13568 ; GENERIC: # %bb.0: 13569 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 13570 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13571 ; GENERIC-NEXT: retq # sched: [1:1.00] 13572 ; 13573 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: 13574 ; SKX: # %bb.0: 13575 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 13576 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13577 ; SKX-NEXT: retq # sched: [7:1.00] 13578 %vec2 = load <8 x double>, <8 x double>* %vec2p 13579 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13580 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13581 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13582 ret <8 x double> %res 13583 } 13584 13585 define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { 13586 ; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask3: 13587 ; GENERIC: # %bb.0: 13588 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13589 ; GENERIC-NEXT: retq # sched: [1:1.00] 13590 ; 13591 ; SKX-LABEL: test_8xdouble_unpack_low_mem_mask3: 13592 ; SKX: # %bb.0: 13593 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13594 ; SKX-NEXT: retq # sched: [7:1.00] 13595 %vec2 = load <8 x double>, <8 x double>* %vec2p 13596 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13597 ret <8 x double> %res 13598 } 13599 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 13600 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: 13601 ; GENERIC: # %bb.0: 13602 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 13603 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13604 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 13605 ; GENERIC-NEXT: retq # sched: [1:1.00] 13606 ; 13607 ; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: 13608 ; SKX: # %bb.0: 13609 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 13610 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13611 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 13612 ; SKX-NEXT: retq # sched: [7:1.00] 13613 %vec2 = load <8 x double>, <8 x double>* %vec2p 13614 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13615 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13616 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 13617 ret <8 x double> %res 13618 } 13619 13620 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 13621 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: 13622 ; GENERIC: # %bb.0: 13623 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 13624 ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13625 ; GENERIC-NEXT: retq # sched: [1:1.00] 13626 ; 13627 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: 13628 ; SKX: # %bb.0: 13629 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 13630 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] 13631 ; SKX-NEXT: retq # sched: [7:1.00] 13632 %vec2 = load <8 x double>, <8 x double>* %vec2p 13633 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 13634 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 13635 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 13636 ret <8 x double> %res 13637 } 13638 13639 define <4 x float> @test_4xfloat_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2) { 13640 ; GENERIC-LABEL: test_4xfloat_unpack_high_mask0: 13641 ; GENERIC: # %bb.0: 13642 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13643 ; GENERIC-NEXT: retq # sched: [1:1.00] 13644 ; 13645 ; SKX-LABEL: test_4xfloat_unpack_high_mask0: 13646 ; SKX: # %bb.0: 13647 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13648 ; SKX-NEXT: retq # sched: [7:1.00] 13649 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13650 ret <4 x float> %res 13651 } 13652 define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 13653 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask0: 13654 ; GENERIC: # %bb.0: 13655 ; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 13656 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13657 ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 13658 ; GENERIC-NEXT: retq # sched: [1:1.00] 13659 ; 13660 ; SKX-LABEL: test_4xfloat_masked_unpack_high_mask0: 13661 ; SKX: # %bb.0: 13662 ; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 13663 ; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13664 ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 13665 ; SKX-NEXT: retq # sched: [7:1.00] 13666 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13667 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13668 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13669 ret <4 x float> %res 13670 } 13671 13672 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 13673 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: 13674 ; GENERIC: # %bb.0: 13675 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13676 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13677 ; GENERIC-NEXT: retq # sched: [1:1.00] 13678 ; 13679 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: 13680 ; SKX: # %bb.0: 13681 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13682 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13683 ; SKX-NEXT: retq # sched: [7:1.00] 13684 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13685 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13686 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13687 ret <4 x float> %res 13688 } 13689 define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 13690 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask1: 13691 ; GENERIC: # %bb.0: 13692 ; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 13693 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13694 ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 13695 ; GENERIC-NEXT: retq # sched: [1:1.00] 13696 ; 13697 ; SKX-LABEL: test_4xfloat_masked_unpack_high_mask1: 13698 ; SKX: # %bb.0: 13699 ; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 13700 ; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13701 ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 13702 ; SKX-NEXT: retq # sched: [7:1.00] 13703 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13704 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13705 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13706 ret <4 x float> %res 13707 } 13708 13709 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 13710 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: 13711 ; GENERIC: # %bb.0: 13712 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13713 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13714 ; GENERIC-NEXT: retq # sched: [1:1.00] 13715 ; 13716 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: 13717 ; SKX: # %bb.0: 13718 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13719 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13720 ; SKX-NEXT: retq # sched: [7:1.00] 13721 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13722 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13723 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13724 ret <4 x float> %res 13725 } 13726 define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 13727 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask2: 13728 ; GENERIC: # %bb.0: 13729 ; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 13730 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13731 ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 13732 ; GENERIC-NEXT: retq # sched: [1:1.00] 13733 ; 13734 ; SKX-LABEL: test_4xfloat_masked_unpack_high_mask2: 13735 ; SKX: # %bb.0: 13736 ; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 13737 ; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13738 ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 13739 ; SKX-NEXT: retq # sched: [7:1.00] 13740 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13741 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13742 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13743 ret <4 x float> %res 13744 } 13745 13746 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 13747 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: 13748 ; GENERIC: # %bb.0: 13749 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13750 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13751 ; GENERIC-NEXT: retq # sched: [1:1.00] 13752 ; 13753 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: 13754 ; SKX: # %bb.0: 13755 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13756 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13757 ; SKX-NEXT: retq # sched: [7:1.00] 13758 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13759 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13760 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13761 ret <4 x float> %res 13762 } 13763 define <4 x float> @test_4xfloat_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2) { 13764 ; GENERIC-LABEL: test_4xfloat_unpack_high_mask3: 13765 ; GENERIC: # %bb.0: 13766 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13767 ; GENERIC-NEXT: retq # sched: [1:1.00] 13768 ; 13769 ; SKX-LABEL: test_4xfloat_unpack_high_mask3: 13770 ; SKX: # %bb.0: 13771 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13772 ; SKX-NEXT: retq # sched: [7:1.00] 13773 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13774 ret <4 x float> %res 13775 } 13776 define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { 13777 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask3: 13778 ; GENERIC: # %bb.0: 13779 ; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] 13780 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13781 ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 13782 ; GENERIC-NEXT: retq # sched: [1:1.00] 13783 ; 13784 ; SKX-LABEL: test_4xfloat_masked_unpack_high_mask3: 13785 ; SKX: # %bb.0: 13786 ; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] 13787 ; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13788 ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 13789 ; SKX-NEXT: retq # sched: [7:1.00] 13790 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13791 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13792 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13793 ret <4 x float> %res 13794 } 13795 13796 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { 13797 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: 13798 ; GENERIC: # %bb.0: 13799 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13800 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13801 ; GENERIC-NEXT: retq # sched: [1:1.00] 13802 ; 13803 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: 13804 ; SKX: # %bb.0: 13805 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13806 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13807 ; SKX-NEXT: retq # sched: [7:1.00] 13808 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13809 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13810 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13811 ret <4 x float> %res 13812 } 13813 define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { 13814 ; GENERIC-LABEL: test_4xfloat_unpack_high_mem_mask0: 13815 ; GENERIC: # %bb.0: 13816 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13817 ; GENERIC-NEXT: retq # sched: [1:1.00] 13818 ; 13819 ; SKX-LABEL: test_4xfloat_unpack_high_mem_mask0: 13820 ; SKX: # %bb.0: 13821 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13822 ; SKX-NEXT: retq # sched: [7:1.00] 13823 %vec2 = load <4 x float>, <4 x float>* %vec2p 13824 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13825 ret <4 x float> %res 13826 } 13827 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 13828 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: 13829 ; GENERIC: # %bb.0: 13830 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13831 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13832 ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 13833 ; GENERIC-NEXT: retq # sched: [1:1.00] 13834 ; 13835 ; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: 13836 ; SKX: # %bb.0: 13837 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13838 ; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13839 ; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 13840 ; SKX-NEXT: retq # sched: [7:1.00] 13841 %vec2 = load <4 x float>, <4 x float>* %vec2p 13842 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13843 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13844 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13845 ret <4 x float> %res 13846 } 13847 13848 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 13849 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: 13850 ; GENERIC: # %bb.0: 13851 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 13852 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13853 ; GENERIC-NEXT: retq # sched: [1:1.00] 13854 ; 13855 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: 13856 ; SKX: # %bb.0: 13857 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 13858 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13859 ; SKX-NEXT: retq # sched: [7:1.00] 13860 %vec2 = load <4 x float>, <4 x float>* %vec2p 13861 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13862 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13863 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13864 ret <4 x float> %res 13865 } 13866 13867 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 13868 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: 13869 ; GENERIC: # %bb.0: 13870 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13871 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13872 ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 13873 ; GENERIC-NEXT: retq # sched: [1:1.00] 13874 ; 13875 ; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: 13876 ; SKX: # %bb.0: 13877 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13878 ; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13879 ; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 13880 ; SKX-NEXT: retq # sched: [7:1.00] 13881 %vec2 = load <4 x float>, <4 x float>* %vec2p 13882 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13883 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13884 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13885 ret <4 x float> %res 13886 } 13887 13888 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 13889 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: 13890 ; GENERIC: # %bb.0: 13891 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 13892 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13893 ; GENERIC-NEXT: retq # sched: [1:1.00] 13894 ; 13895 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: 13896 ; SKX: # %bb.0: 13897 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 13898 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13899 ; SKX-NEXT: retq # sched: [7:1.00] 13900 %vec2 = load <4 x float>, <4 x float>* %vec2p 13901 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13902 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13903 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13904 ret <4 x float> %res 13905 } 13906 13907 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 13908 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: 13909 ; GENERIC: # %bb.0: 13910 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13911 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13912 ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 13913 ; GENERIC-NEXT: retq # sched: [1:1.00] 13914 ; 13915 ; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: 13916 ; SKX: # %bb.0: 13917 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13918 ; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13919 ; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 13920 ; SKX-NEXT: retq # sched: [7:1.00] 13921 %vec2 = load <4 x float>, <4 x float>* %vec2p 13922 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13923 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13924 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13925 ret <4 x float> %res 13926 } 13927 13928 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 13929 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: 13930 ; GENERIC: # %bb.0: 13931 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 13932 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13933 ; GENERIC-NEXT: retq # sched: [1:1.00] 13934 ; 13935 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: 13936 ; SKX: # %bb.0: 13937 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 13938 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13939 ; SKX-NEXT: retq # sched: [7:1.00] 13940 %vec2 = load <4 x float>, <4 x float>* %vec2p 13941 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13942 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13943 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13944 ret <4 x float> %res 13945 } 13946 13947 define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { 13948 ; GENERIC-LABEL: test_4xfloat_unpack_high_mem_mask3: 13949 ; GENERIC: # %bb.0: 13950 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13951 ; GENERIC-NEXT: retq # sched: [1:1.00] 13952 ; 13953 ; SKX-LABEL: test_4xfloat_unpack_high_mem_mask3: 13954 ; SKX: # %bb.0: 13955 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13956 ; SKX-NEXT: retq # sched: [7:1.00] 13957 %vec2 = load <4 x float>, <4 x float>* %vec2p 13958 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13959 ret <4 x float> %res 13960 } 13961 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { 13962 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: 13963 ; GENERIC: # %bb.0: 13964 ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] 13965 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13966 ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] 13967 ; GENERIC-NEXT: retq # sched: [1:1.00] 13968 ; 13969 ; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: 13970 ; SKX: # %bb.0: 13971 ; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] 13972 ; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13973 ; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] 13974 ; SKX-NEXT: retq # sched: [7:1.00] 13975 %vec2 = load <4 x float>, <4 x float>* %vec2p 13976 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13977 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13978 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 13979 ret <4 x float> %res 13980 } 13981 13982 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { 13983 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: 13984 ; GENERIC: # %bb.0: 13985 ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] 13986 ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13987 ; GENERIC-NEXT: retq # sched: [1:1.00] 13988 ; 13989 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: 13990 ; SKX: # %bb.0: 13991 ; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] 13992 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13993 ; SKX-NEXT: retq # sched: [7:1.00] 13994 %vec2 = load <4 x float>, <4 x float>* %vec2p 13995 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13996 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 13997 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer 13998 ret <4 x float> %res 13999 } 14000 14001 define <8 x float> @test_8xfloat_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2) { 14002 ; GENERIC-LABEL: test_8xfloat_unpack_high_mask0: 14003 ; GENERIC: # %bb.0: 14004 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14005 ; GENERIC-NEXT: retq # sched: [1:1.00] 14006 ; 14007 ; SKX-LABEL: test_8xfloat_unpack_high_mask0: 14008 ; SKX: # %bb.0: 14009 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14010 ; SKX-NEXT: retq # sched: [7:1.00] 14011 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14012 ret <8 x float> %res 14013 } 14014 define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 14015 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask0: 14016 ; GENERIC: # %bb.0: 14017 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 14018 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14019 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 14020 ; GENERIC-NEXT: retq # sched: [1:1.00] 14021 ; 14022 ; SKX-LABEL: test_8xfloat_masked_unpack_high_mask0: 14023 ; SKX: # %bb.0: 14024 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 14025 ; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14026 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 14027 ; SKX-NEXT: retq # sched: [7:1.00] 14028 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14029 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14030 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14031 ret <8 x float> %res 14032 } 14033 14034 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 14035 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: 14036 ; GENERIC: # %bb.0: 14037 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14038 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14039 ; GENERIC-NEXT: retq # sched: [1:1.00] 14040 ; 14041 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: 14042 ; SKX: # %bb.0: 14043 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14044 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14045 ; SKX-NEXT: retq # sched: [7:1.00] 14046 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14047 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14048 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14049 ret <8 x float> %res 14050 } 14051 define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 14052 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask1: 14053 ; GENERIC: # %bb.0: 14054 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 14055 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14056 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 14057 ; GENERIC-NEXT: retq # sched: [1:1.00] 14058 ; 14059 ; SKX-LABEL: test_8xfloat_masked_unpack_high_mask1: 14060 ; SKX: # %bb.0: 14061 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 14062 ; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14063 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 14064 ; SKX-NEXT: retq # sched: [7:1.00] 14065 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14066 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14067 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14068 ret <8 x float> %res 14069 } 14070 14071 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 14072 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: 14073 ; GENERIC: # %bb.0: 14074 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14075 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14076 ; GENERIC-NEXT: retq # sched: [1:1.00] 14077 ; 14078 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: 14079 ; SKX: # %bb.0: 14080 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14081 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14082 ; SKX-NEXT: retq # sched: [7:1.00] 14083 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14084 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14085 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14086 ret <8 x float> %res 14087 } 14088 define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 14089 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask2: 14090 ; GENERIC: # %bb.0: 14091 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 14092 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14093 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 14094 ; GENERIC-NEXT: retq # sched: [1:1.00] 14095 ; 14096 ; SKX-LABEL: test_8xfloat_masked_unpack_high_mask2: 14097 ; SKX: # %bb.0: 14098 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 14099 ; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14100 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 14101 ; SKX-NEXT: retq # sched: [7:1.00] 14102 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14103 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14104 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14105 ret <8 x float> %res 14106 } 14107 14108 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 14109 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: 14110 ; GENERIC: # %bb.0: 14111 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14112 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14113 ; GENERIC-NEXT: retq # sched: [1:1.00] 14114 ; 14115 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: 14116 ; SKX: # %bb.0: 14117 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14118 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14119 ; SKX-NEXT: retq # sched: [7:1.00] 14120 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14121 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14122 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14123 ret <8 x float> %res 14124 } 14125 define <8 x float> @test_8xfloat_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2) { 14126 ; GENERIC-LABEL: test_8xfloat_unpack_high_mask3: 14127 ; GENERIC: # %bb.0: 14128 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14129 ; GENERIC-NEXT: retq # sched: [1:1.00] 14130 ; 14131 ; SKX-LABEL: test_8xfloat_unpack_high_mask3: 14132 ; SKX: # %bb.0: 14133 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14134 ; SKX-NEXT: retq # sched: [7:1.00] 14135 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14136 ret <8 x float> %res 14137 } 14138 define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { 14139 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask3: 14140 ; GENERIC: # %bb.0: 14141 ; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 14142 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14143 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 14144 ; GENERIC-NEXT: retq # sched: [1:1.00] 14145 ; 14146 ; SKX-LABEL: test_8xfloat_masked_unpack_high_mask3: 14147 ; SKX: # %bb.0: 14148 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 14149 ; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14150 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 14151 ; SKX-NEXT: retq # sched: [7:1.00] 14152 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14153 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14154 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14155 ret <8 x float> %res 14156 } 14157 14158 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { 14159 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: 14160 ; GENERIC: # %bb.0: 14161 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14162 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14163 ; GENERIC-NEXT: retq # sched: [1:1.00] 14164 ; 14165 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: 14166 ; SKX: # %bb.0: 14167 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14168 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 14169 ; SKX-NEXT: retq # sched: [7:1.00] 14170 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14171 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14172 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14173 ret <8 x float> %res 14174 } 14175 define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { 14176 ; GENERIC-LABEL: test_8xfloat_unpack_high_mem_mask0: 14177 ; GENERIC: # %bb.0: 14178 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14179 ; GENERIC-NEXT: retq # sched: [1:1.00] 14180 ; 14181 ; SKX-LABEL: test_8xfloat_unpack_high_mem_mask0: 14182 ; SKX: # %bb.0: 14183 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14184 ; SKX-NEXT: retq # sched: [7:1.00] 14185 %vec2 = load <8 x float>, <8 x float>* %vec2p 14186 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14187 ret <8 x float> %res 14188 } 14189 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 14190 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: 14191 ; GENERIC: # %bb.0: 14192 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14193 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14194 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 14195 ; GENERIC-NEXT: retq # sched: [1:1.00] 14196 ; 14197 ; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: 14198 ; SKX: # %bb.0: 14199 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14200 ; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14201 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 14202 ; SKX-NEXT: retq # sched: [7:1.00] 14203 %vec2 = load <8 x float>, <8 x float>* %vec2p 14204 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14205 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14206 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14207 ret <8 x float> %res 14208 } 14209 14210 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 14211 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: 14212 ; GENERIC: # %bb.0: 14213 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 14214 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14215 ; GENERIC-NEXT: retq # sched: [1:1.00] 14216 ; 14217 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: 14218 ; SKX: # %bb.0: 14219 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 14220 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14221 ; SKX-NEXT: retq # sched: [7:1.00] 14222 %vec2 = load <8 x float>, <8 x float>* %vec2p 14223 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14224 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14225 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14226 ret <8 x float> %res 14227 } 14228 14229 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 14230 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: 14231 ; GENERIC: # %bb.0: 14232 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14233 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14234 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 14235 ; GENERIC-NEXT: retq # sched: [1:1.00] 14236 ; 14237 ; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: 14238 ; SKX: # %bb.0: 14239 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14240 ; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14241 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 14242 ; SKX-NEXT: retq # sched: [7:1.00] 14243 %vec2 = load <8 x float>, <8 x float>* %vec2p 14244 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14245 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14246 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14247 ret <8 x float> %res 14248 } 14249 14250 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 14251 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: 14252 ; GENERIC: # %bb.0: 14253 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 14254 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14255 ; GENERIC-NEXT: retq # sched: [1:1.00] 14256 ; 14257 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: 14258 ; SKX: # %bb.0: 14259 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 14260 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14261 ; SKX-NEXT: retq # sched: [7:1.00] 14262 %vec2 = load <8 x float>, <8 x float>* %vec2p 14263 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14264 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14265 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14266 ret <8 x float> %res 14267 } 14268 14269 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 14270 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: 14271 ; GENERIC: # %bb.0: 14272 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14273 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14274 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 14275 ; GENERIC-NEXT: retq # sched: [1:1.00] 14276 ; 14277 ; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: 14278 ; SKX: # %bb.0: 14279 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14280 ; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14281 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 14282 ; SKX-NEXT: retq # sched: [7:1.00] 14283 %vec2 = load <8 x float>, <8 x float>* %vec2p 14284 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14285 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14286 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14287 ret <8 x float> %res 14288 } 14289 14290 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 14291 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: 14292 ; GENERIC: # %bb.0: 14293 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 14294 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14295 ; GENERIC-NEXT: retq # sched: [1:1.00] 14296 ; 14297 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: 14298 ; SKX: # %bb.0: 14299 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 14300 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14301 ; SKX-NEXT: retq # sched: [7:1.00] 14302 %vec2 = load <8 x float>, <8 x float>* %vec2p 14303 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14304 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14305 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14306 ret <8 x float> %res 14307 } 14308 14309 define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { 14310 ; GENERIC-LABEL: test_8xfloat_unpack_high_mem_mask3: 14311 ; GENERIC: # %bb.0: 14312 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14313 ; GENERIC-NEXT: retq # sched: [1:1.00] 14314 ; 14315 ; SKX-LABEL: test_8xfloat_unpack_high_mem_mask3: 14316 ; SKX: # %bb.0: 14317 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14318 ; SKX-NEXT: retq # sched: [7:1.00] 14319 %vec2 = load <8 x float>, <8 x float>* %vec2p 14320 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14321 ret <8 x float> %res 14322 } 14323 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { 14324 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: 14325 ; GENERIC: # %bb.0: 14326 ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 14327 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14328 ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] 14329 ; GENERIC-NEXT: retq # sched: [1:1.00] 14330 ; 14331 ; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: 14332 ; SKX: # %bb.0: 14333 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 14334 ; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14335 ; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] 14336 ; SKX-NEXT: retq # sched: [7:1.00] 14337 %vec2 = load <8 x float>, <8 x float>* %vec2p 14338 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14339 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14340 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 14341 ret <8 x float> %res 14342 } 14343 14344 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { 14345 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: 14346 ; GENERIC: # %bb.0: 14347 ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 14348 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14349 ; GENERIC-NEXT: retq # sched: [1:1.00] 14350 ; 14351 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: 14352 ; SKX: # %bb.0: 14353 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 14354 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 14355 ; SKX-NEXT: retq # sched: [7:1.00] 14356 %vec2 = load <8 x float>, <8 x float>* %vec2p 14357 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 14358 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 14359 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 14360 ret <8 x float> %res 14361 } 14362 14363 define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2) { 14364 ; GENERIC-LABEL: test_16xfloat_unpack_high_mask0: 14365 ; GENERIC: # %bb.0: 14366 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14367 ; GENERIC-NEXT: retq # sched: [1:1.00] 14368 ; 14369 ; SKX-LABEL: test_16xfloat_unpack_high_mask0: 14370 ; SKX: # %bb.0: 14371 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14372 ; SKX-NEXT: retq # sched: [7:1.00] 14373 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14374 ret <16 x float> %res 14375 } 14376 define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 14377 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask0: 14378 ; GENERIC: # %bb.0: 14379 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 14380 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14381 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 14382 ; GENERIC-NEXT: retq # sched: [1:1.00] 14383 ; 14384 ; SKX-LABEL: test_16xfloat_masked_unpack_high_mask0: 14385 ; SKX: # %bb.0: 14386 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 14387 ; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14388 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 14389 ; SKX-NEXT: retq # sched: [7:1.00] 14390 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14391 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14392 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14393 ret <16 x float> %res 14394 } 14395 14396 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 14397 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: 14398 ; GENERIC: # %bb.0: 14399 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14400 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14401 ; GENERIC-NEXT: retq # sched: [1:1.00] 14402 ; 14403 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: 14404 ; SKX: # %bb.0: 14405 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14406 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14407 ; SKX-NEXT: retq # sched: [7:1.00] 14408 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14409 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14410 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14411 ret <16 x float> %res 14412 } 14413 define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 14414 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask1: 14415 ; GENERIC: # %bb.0: 14416 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 14417 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14418 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 14419 ; GENERIC-NEXT: retq # sched: [1:1.00] 14420 ; 14421 ; SKX-LABEL: test_16xfloat_masked_unpack_high_mask1: 14422 ; SKX: # %bb.0: 14423 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 14424 ; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14425 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 14426 ; SKX-NEXT: retq # sched: [7:1.00] 14427 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14428 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14429 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14430 ret <16 x float> %res 14431 } 14432 14433 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 14434 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: 14435 ; GENERIC: # %bb.0: 14436 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14437 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14438 ; GENERIC-NEXT: retq # sched: [1:1.00] 14439 ; 14440 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: 14441 ; SKX: # %bb.0: 14442 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14443 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14444 ; SKX-NEXT: retq # sched: [7:1.00] 14445 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14446 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14447 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14448 ret <16 x float> %res 14449 } 14450 define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 14451 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask2: 14452 ; GENERIC: # %bb.0: 14453 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 14454 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14455 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 14456 ; GENERIC-NEXT: retq # sched: [1:1.00] 14457 ; 14458 ; SKX-LABEL: test_16xfloat_masked_unpack_high_mask2: 14459 ; SKX: # %bb.0: 14460 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 14461 ; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14462 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 14463 ; SKX-NEXT: retq # sched: [7:1.00] 14464 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14465 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14466 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14467 ret <16 x float> %res 14468 } 14469 14470 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 14471 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: 14472 ; GENERIC: # %bb.0: 14473 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14474 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14475 ; GENERIC-NEXT: retq # sched: [1:1.00] 14476 ; 14477 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: 14478 ; SKX: # %bb.0: 14479 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14480 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14481 ; SKX-NEXT: retq # sched: [7:1.00] 14482 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14483 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14484 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14485 ret <16 x float> %res 14486 } 14487 define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2) { 14488 ; GENERIC-LABEL: test_16xfloat_unpack_high_mask3: 14489 ; GENERIC: # %bb.0: 14490 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14491 ; GENERIC-NEXT: retq # sched: [1:1.00] 14492 ; 14493 ; SKX-LABEL: test_16xfloat_unpack_high_mask3: 14494 ; SKX: # %bb.0: 14495 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14496 ; SKX-NEXT: retq # sched: [7:1.00] 14497 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14498 ret <16 x float> %res 14499 } 14500 define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { 14501 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask3: 14502 ; GENERIC: # %bb.0: 14503 ; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 14504 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14505 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 14506 ; GENERIC-NEXT: retq # sched: [1:1.00] 14507 ; 14508 ; SKX-LABEL: test_16xfloat_masked_unpack_high_mask3: 14509 ; SKX: # %bb.0: 14510 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 14511 ; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14512 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 14513 ; SKX-NEXT: retq # sched: [7:1.00] 14514 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14515 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14516 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14517 ret <16 x float> %res 14518 } 14519 14520 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { 14521 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: 14522 ; GENERIC: # %bb.0: 14523 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14524 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14525 ; GENERIC-NEXT: retq # sched: [1:1.00] 14526 ; 14527 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: 14528 ; SKX: # %bb.0: 14529 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14530 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] 14531 ; SKX-NEXT: retq # sched: [7:1.00] 14532 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14533 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14534 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14535 ret <16 x float> %res 14536 } 14537 define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { 14538 ; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask0: 14539 ; GENERIC: # %bb.0: 14540 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14541 ; GENERIC-NEXT: retq # sched: [1:1.00] 14542 ; 14543 ; SKX-LABEL: test_16xfloat_unpack_high_mem_mask0: 14544 ; SKX: # %bb.0: 14545 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14546 ; SKX-NEXT: retq # sched: [7:1.00] 14547 %vec2 = load <16 x float>, <16 x float>* %vec2p 14548 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14549 ret <16 x float> %res 14550 } 14551 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 14552 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: 14553 ; GENERIC: # %bb.0: 14554 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14555 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14556 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 14557 ; GENERIC-NEXT: retq # sched: [1:1.00] 14558 ; 14559 ; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: 14560 ; SKX: # %bb.0: 14561 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14562 ; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14563 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 14564 ; SKX-NEXT: retq # sched: [7:1.00] 14565 %vec2 = load <16 x float>, <16 x float>* %vec2p 14566 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14567 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14568 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14569 ret <16 x float> %res 14570 } 14571 14572 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 14573 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: 14574 ; GENERIC: # %bb.0: 14575 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 14576 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14577 ; GENERIC-NEXT: retq # sched: [1:1.00] 14578 ; 14579 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: 14580 ; SKX: # %bb.0: 14581 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 14582 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14583 ; SKX-NEXT: retq # sched: [7:1.00] 14584 %vec2 = load <16 x float>, <16 x float>* %vec2p 14585 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14586 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14587 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14588 ret <16 x float> %res 14589 } 14590 14591 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 14592 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: 14593 ; GENERIC: # %bb.0: 14594 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14595 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14596 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 14597 ; GENERIC-NEXT: retq # sched: [1:1.00] 14598 ; 14599 ; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: 14600 ; SKX: # %bb.0: 14601 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14602 ; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14603 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 14604 ; SKX-NEXT: retq # sched: [7:1.00] 14605 %vec2 = load <16 x float>, <16 x float>* %vec2p 14606 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14607 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14608 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14609 ret <16 x float> %res 14610 } 14611 14612 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 14613 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: 14614 ; GENERIC: # %bb.0: 14615 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 14616 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14617 ; GENERIC-NEXT: retq # sched: [1:1.00] 14618 ; 14619 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: 14620 ; SKX: # %bb.0: 14621 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 14622 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14623 ; SKX-NEXT: retq # sched: [7:1.00] 14624 %vec2 = load <16 x float>, <16 x float>* %vec2p 14625 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14626 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14627 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14628 ret <16 x float> %res 14629 } 14630 14631 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 14632 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: 14633 ; GENERIC: # %bb.0: 14634 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14635 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14636 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 14637 ; GENERIC-NEXT: retq # sched: [1:1.00] 14638 ; 14639 ; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: 14640 ; SKX: # %bb.0: 14641 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14642 ; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14643 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 14644 ; SKX-NEXT: retq # sched: [7:1.00] 14645 %vec2 = load <16 x float>, <16 x float>* %vec2p 14646 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14647 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14648 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14649 ret <16 x float> %res 14650 } 14651 14652 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 14653 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: 14654 ; GENERIC: # %bb.0: 14655 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 14656 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14657 ; GENERIC-NEXT: retq # sched: [1:1.00] 14658 ; 14659 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: 14660 ; SKX: # %bb.0: 14661 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 14662 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14663 ; SKX-NEXT: retq # sched: [7:1.00] 14664 %vec2 = load <16 x float>, <16 x float>* %vec2p 14665 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14666 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14667 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14668 ret <16 x float> %res 14669 } 14670 14671 define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { 14672 ; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask3: 14673 ; GENERIC: # %bb.0: 14674 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14675 ; GENERIC-NEXT: retq # sched: [1:1.00] 14676 ; 14677 ; SKX-LABEL: test_16xfloat_unpack_high_mem_mask3: 14678 ; SKX: # %bb.0: 14679 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14680 ; SKX-NEXT: retq # sched: [7:1.00] 14681 %vec2 = load <16 x float>, <16 x float>* %vec2p 14682 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14683 ret <16 x float> %res 14684 } 14685 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { 14686 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: 14687 ; GENERIC: # %bb.0: 14688 ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 14689 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14690 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 14691 ; GENERIC-NEXT: retq # sched: [1:1.00] 14692 ; 14693 ; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: 14694 ; SKX: # %bb.0: 14695 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 14696 ; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14697 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 14698 ; SKX-NEXT: retq # sched: [7:1.00] 14699 %vec2 = load <16 x float>, <16 x float>* %vec2p 14700 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14701 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14702 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 14703 ret <16 x float> %res 14704 } 14705 14706 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { 14707 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: 14708 ; GENERIC: # %bb.0: 14709 ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 14710 ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14711 ; GENERIC-NEXT: retq # sched: [1:1.00] 14712 ; 14713 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: 14714 ; SKX: # %bb.0: 14715 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 14716 ; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] 14717 ; SKX-NEXT: retq # sched: [7:1.00] 14718 %vec2 = load <16 x float>, <16 x float>* %vec2p 14719 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 14720 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 14721 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 14722 ret <16 x float> %res 14723 } 14724 14725 define <2 x double> @test_2xdouble_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2) { 14726 ; GENERIC-LABEL: test_2xdouble_unpack_high_mask0: 14727 ; GENERIC: # %bb.0: 14728 ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14729 ; GENERIC-NEXT: retq # sched: [1:1.00] 14730 ; 14731 ; SKX-LABEL: test_2xdouble_unpack_high_mask0: 14732 ; SKX: # %bb.0: 14733 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14734 ; SKX-NEXT: retq # sched: [7:1.00] 14735 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14736 ret <2 x double> %res 14737 } 14738 define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { 14739 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask0: 14740 ; GENERIC: # %bb.0: 14741 ; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] 14742 ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] 14743 ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 14744 ; GENERIC-NEXT: retq # sched: [1:1.00] 14745 ; 14746 ; SKX-LABEL: test_2xdouble_masked_unpack_high_mask0: 14747 ; SKX: # %bb.0: 14748 ; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] 14749 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] 14750 ; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] 14751 ; SKX-NEXT: retq # sched: [7:1.00] 14752 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14753 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14754 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 14755 ret <2 x double> %res 14756 } 14757 14758 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { 14759 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: 14760 ; GENERIC: # %bb.0: 14761 ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 14762 ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] 14763 ; GENERIC-NEXT: retq # sched: [1:1.00] 14764 ; 14765 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: 14766 ; SKX: # %bb.0: 14767 ; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 14768 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] 14769 ; SKX-NEXT: retq # sched: [7:1.00] 14770 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14771 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14772 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 14773 ret <2 x double> %res 14774 } 14775 define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { 14776 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask1: 14777 ; GENERIC: # %bb.0: 14778 ; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] 14779 ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] 14780 ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 14781 ; GENERIC-NEXT: retq # sched: [1:1.00] 14782 ; 14783 ; SKX-LABEL: test_2xdouble_masked_unpack_high_mask1: 14784 ; SKX: # %bb.0: 14785 ; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] 14786 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] 14787 ; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] 14788 ; SKX-NEXT: retq # sched: [7:1.00] 14789 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14790 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14791 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 14792 ret <2 x double> %res 14793 } 14794 14795 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { 14796 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: 14797 ; GENERIC: # %bb.0: 14798 ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 14799 ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] 14800 ; GENERIC-NEXT: retq # sched: [1:1.00] 14801 ; 14802 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: 14803 ; SKX: # %bb.0: 14804 ; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 14805 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] 14806 ; SKX-NEXT: retq # sched: [7:1.00] 14807 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14808 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14809 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 14810 ret <2 x double> %res 14811 } 14812 define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { 14813 ; GENERIC-LABEL: test_2xdouble_unpack_high_mem_mask0: 14814 ; GENERIC: # %bb.0: 14815 ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] sched: [7:1.00] 14816 ; GENERIC-NEXT: retq # sched: [1:1.00] 14817 ; 14818 ; SKX-LABEL: test_2xdouble_unpack_high_mem_mask0: 14819 ; SKX: # %bb.0: 14820 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] sched: [7:1.00] 14821 ; SKX-NEXT: retq # sched: [7:1.00] 14822 %vec2 = load <2 x double>, <2 x double>* %vec2p 14823 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14824 ret <2 x double> %res 14825 } 14826 define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { 14827 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: 14828 ; GENERIC: # %bb.0: 14829 ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 14830 ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] 14831 ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] 14832 ; GENERIC-NEXT: retq # sched: [1:1.00] 14833 ; 14834 ; SKX-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: 14835 ; SKX: # %bb.0: 14836 ; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 14837 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] 14838 ; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] 14839 ; SKX-NEXT: retq # sched: [7:1.00] 14840 %vec2 = load <2 x double>, <2 x double>* %vec2p 14841 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14842 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14843 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 14844 ret <2 x double> %res 14845 } 14846 14847 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { 14848 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: 14849 ; GENERIC: # %bb.0: 14850 ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] 14851 ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] 14852 ; GENERIC-NEXT: retq # sched: [1:1.00] 14853 ; 14854 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: 14855 ; SKX: # %bb.0: 14856 ; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] 14857 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] 14858 ; SKX-NEXT: retq # sched: [7:1.00] 14859 %vec2 = load <2 x double>, <2 x double>* %vec2p 14860 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14861 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14862 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 14863 ret <2 x double> %res 14864 } 14865 14866 define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { 14867 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: 14868 ; GENERIC: # %bb.0: 14869 ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] 14870 ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] 14871 ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] 14872 ; GENERIC-NEXT: retq # sched: [1:1.00] 14873 ; 14874 ; SKX-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: 14875 ; SKX: # %bb.0: 14876 ; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] 14877 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] 14878 ; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] 14879 ; SKX-NEXT: retq # sched: [7:1.00] 14880 %vec2 = load <2 x double>, <2 x double>* %vec2p 14881 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14882 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14883 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 14884 ret <2 x double> %res 14885 } 14886 14887 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { 14888 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: 14889 ; GENERIC: # %bb.0: 14890 ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] 14891 ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] 14892 ; GENERIC-NEXT: retq # sched: [1:1.00] 14893 ; 14894 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: 14895 ; SKX: # %bb.0: 14896 ; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] 14897 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] 14898 ; SKX-NEXT: retq # sched: [7:1.00] 14899 %vec2 = load <2 x double>, <2 x double>* %vec2p 14900 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> 14901 %cmp = icmp eq <2 x i64> %mask, zeroinitializer 14902 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer 14903 ret <2 x double> %res 14904 } 14905 14906 define <4 x double> @test_4xdouble_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2) { 14907 ; GENERIC-LABEL: test_4xdouble_unpack_high_mask0: 14908 ; GENERIC: # %bb.0: 14909 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14910 ; GENERIC-NEXT: retq # sched: [1:1.00] 14911 ; 14912 ; SKX-LABEL: test_4xdouble_unpack_high_mask0: 14913 ; SKX: # %bb.0: 14914 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14915 ; SKX-NEXT: retq # sched: [7:1.00] 14916 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 14917 ret <4 x double> %res 14918 } 14919 define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 14920 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask0: 14921 ; GENERIC: # %bb.0: 14922 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 14923 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14924 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 14925 ; GENERIC-NEXT: retq # sched: [1:1.00] 14926 ; 14927 ; SKX-LABEL: test_4xdouble_masked_unpack_high_mask0: 14928 ; SKX: # %bb.0: 14929 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 14930 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14931 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 14932 ; SKX-NEXT: retq # sched: [7:1.00] 14933 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 14934 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 14935 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 14936 ret <4 x double> %res 14937 } 14938 14939 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 14940 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: 14941 ; GENERIC: # %bb.0: 14942 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 14943 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14944 ; GENERIC-NEXT: retq # sched: [1:1.00] 14945 ; 14946 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: 14947 ; SKX: # %bb.0: 14948 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 14949 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14950 ; SKX-NEXT: retq # sched: [7:1.00] 14951 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 14952 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 14953 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 14954 ret <4 x double> %res 14955 } 14956 define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 14957 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask1: 14958 ; GENERIC: # %bb.0: 14959 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 14960 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14961 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 14962 ; GENERIC-NEXT: retq # sched: [1:1.00] 14963 ; 14964 ; SKX-LABEL: test_4xdouble_masked_unpack_high_mask1: 14965 ; SKX: # %bb.0: 14966 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 14967 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14968 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 14969 ; SKX-NEXT: retq # sched: [7:1.00] 14970 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 14971 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 14972 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 14973 ret <4 x double> %res 14974 } 14975 14976 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 14977 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: 14978 ; GENERIC: # %bb.0: 14979 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 14980 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14981 ; GENERIC-NEXT: retq # sched: [1:1.00] 14982 ; 14983 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: 14984 ; SKX: # %bb.0: 14985 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 14986 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14987 ; SKX-NEXT: retq # sched: [7:1.00] 14988 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 14989 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 14990 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 14991 ret <4 x double> %res 14992 } 14993 define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 14994 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask2: 14995 ; GENERIC: # %bb.0: 14996 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 14997 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 14998 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 14999 ; GENERIC-NEXT: retq # sched: [1:1.00] 15000 ; 15001 ; SKX-LABEL: test_4xdouble_masked_unpack_high_mask2: 15002 ; SKX: # %bb.0: 15003 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 15004 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15005 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 15006 ; SKX-NEXT: retq # sched: [7:1.00] 15007 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15008 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15009 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15010 ret <4 x double> %res 15011 } 15012 15013 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 15014 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: 15015 ; GENERIC: # %bb.0: 15016 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15017 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15018 ; GENERIC-NEXT: retq # sched: [1:1.00] 15019 ; 15020 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: 15021 ; SKX: # %bb.0: 15022 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15023 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15024 ; SKX-NEXT: retq # sched: [7:1.00] 15025 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15026 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15027 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15028 ret <4 x double> %res 15029 } 15030 define <4 x double> @test_4xdouble_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2) { 15031 ; GENERIC-LABEL: test_4xdouble_unpack_high_mask3: 15032 ; GENERIC: # %bb.0: 15033 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15034 ; GENERIC-NEXT: retq # sched: [1:1.00] 15035 ; 15036 ; SKX-LABEL: test_4xdouble_unpack_high_mask3: 15037 ; SKX: # %bb.0: 15038 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15039 ; SKX-NEXT: retq # sched: [7:1.00] 15040 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15041 ret <4 x double> %res 15042 } 15043 define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { 15044 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask3: 15045 ; GENERIC: # %bb.0: 15046 ; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] 15047 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15048 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 15049 ; GENERIC-NEXT: retq # sched: [1:1.00] 15050 ; 15051 ; SKX-LABEL: test_4xdouble_masked_unpack_high_mask3: 15052 ; SKX: # %bb.0: 15053 ; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] 15054 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15055 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 15056 ; SKX-NEXT: retq # sched: [7:1.00] 15057 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15058 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15059 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15060 ret <4 x double> %res 15061 } 15062 15063 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { 15064 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: 15065 ; GENERIC: # %bb.0: 15066 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15067 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15068 ; GENERIC-NEXT: retq # sched: [1:1.00] 15069 ; 15070 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: 15071 ; SKX: # %bb.0: 15072 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15073 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 15074 ; SKX-NEXT: retq # sched: [7:1.00] 15075 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15076 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15077 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15078 ret <4 x double> %res 15079 } 15080 define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { 15081 ; GENERIC-LABEL: test_4xdouble_unpack_high_mem_mask0: 15082 ; GENERIC: # %bb.0: 15083 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15084 ; GENERIC-NEXT: retq # sched: [1:1.00] 15085 ; 15086 ; SKX-LABEL: test_4xdouble_unpack_high_mem_mask0: 15087 ; SKX: # %bb.0: 15088 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15089 ; SKX-NEXT: retq # sched: [7:1.00] 15090 %vec2 = load <4 x double>, <4 x double>* %vec2p 15091 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15092 ret <4 x double> %res 15093 } 15094 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 15095 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: 15096 ; GENERIC: # %bb.0: 15097 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15098 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15099 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 15100 ; GENERIC-NEXT: retq # sched: [1:1.00] 15101 ; 15102 ; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: 15103 ; SKX: # %bb.0: 15104 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15105 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15106 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 15107 ; SKX-NEXT: retq # sched: [7:1.00] 15108 %vec2 = load <4 x double>, <4 x double>* %vec2p 15109 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15110 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15111 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15112 ret <4 x double> %res 15113 } 15114 15115 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 15116 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: 15117 ; GENERIC: # %bb.0: 15118 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 15119 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15120 ; GENERIC-NEXT: retq # sched: [1:1.00] 15121 ; 15122 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: 15123 ; SKX: # %bb.0: 15124 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 15125 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15126 ; SKX-NEXT: retq # sched: [7:1.00] 15127 %vec2 = load <4 x double>, <4 x double>* %vec2p 15128 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15129 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15130 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15131 ret <4 x double> %res 15132 } 15133 15134 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 15135 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: 15136 ; GENERIC: # %bb.0: 15137 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15138 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15139 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 15140 ; GENERIC-NEXT: retq # sched: [1:1.00] 15141 ; 15142 ; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: 15143 ; SKX: # %bb.0: 15144 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15145 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15146 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 15147 ; SKX-NEXT: retq # sched: [7:1.00] 15148 %vec2 = load <4 x double>, <4 x double>* %vec2p 15149 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15150 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15151 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15152 ret <4 x double> %res 15153 } 15154 15155 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 15156 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: 15157 ; GENERIC: # %bb.0: 15158 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 15159 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15160 ; GENERIC-NEXT: retq # sched: [1:1.00] 15161 ; 15162 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: 15163 ; SKX: # %bb.0: 15164 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 15165 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15166 ; SKX-NEXT: retq # sched: [7:1.00] 15167 %vec2 = load <4 x double>, <4 x double>* %vec2p 15168 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15169 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15170 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15171 ret <4 x double> %res 15172 } 15173 15174 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 15175 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: 15176 ; GENERIC: # %bb.0: 15177 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15178 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15179 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 15180 ; GENERIC-NEXT: retq # sched: [1:1.00] 15181 ; 15182 ; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: 15183 ; SKX: # %bb.0: 15184 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15185 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15186 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 15187 ; SKX-NEXT: retq # sched: [7:1.00] 15188 %vec2 = load <4 x double>, <4 x double>* %vec2p 15189 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15190 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15191 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15192 ret <4 x double> %res 15193 } 15194 15195 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 15196 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: 15197 ; GENERIC: # %bb.0: 15198 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 15199 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15200 ; GENERIC-NEXT: retq # sched: [1:1.00] 15201 ; 15202 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: 15203 ; SKX: # %bb.0: 15204 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 15205 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15206 ; SKX-NEXT: retq # sched: [7:1.00] 15207 %vec2 = load <4 x double>, <4 x double>* %vec2p 15208 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15209 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15210 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15211 ret <4 x double> %res 15212 } 15213 15214 define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { 15215 ; GENERIC-LABEL: test_4xdouble_unpack_high_mem_mask3: 15216 ; GENERIC: # %bb.0: 15217 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15218 ; GENERIC-NEXT: retq # sched: [1:1.00] 15219 ; 15220 ; SKX-LABEL: test_4xdouble_unpack_high_mem_mask3: 15221 ; SKX: # %bb.0: 15222 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15223 ; SKX-NEXT: retq # sched: [7:1.00] 15224 %vec2 = load <4 x double>, <4 x double>* %vec2p 15225 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15226 ret <4 x double> %res 15227 } 15228 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { 15229 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: 15230 ; GENERIC: # %bb.0: 15231 ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] 15232 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15233 ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] 15234 ; GENERIC-NEXT: retq # sched: [1:1.00] 15235 ; 15236 ; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: 15237 ; SKX: # %bb.0: 15238 ; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] 15239 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15240 ; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] 15241 ; SKX-NEXT: retq # sched: [7:1.00] 15242 %vec2 = load <4 x double>, <4 x double>* %vec2p 15243 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15244 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15245 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 15246 ret <4 x double> %res 15247 } 15248 15249 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { 15250 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: 15251 ; GENERIC: # %bb.0: 15252 ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] 15253 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15254 ; GENERIC-NEXT: retq # sched: [1:1.00] 15255 ; 15256 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: 15257 ; SKX: # %bb.0: 15258 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] 15259 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 15260 ; SKX-NEXT: retq # sched: [7:1.00] 15261 %vec2 = load <4 x double>, <4 x double>* %vec2p 15262 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 15263 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 15264 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 15265 ret <4 x double> %res 15266 } 15267 15268 define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2) { 15269 ; GENERIC-LABEL: test_8xdouble_unpack_high_mask0: 15270 ; GENERIC: # %bb.0: 15271 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15272 ; GENERIC-NEXT: retq # sched: [1:1.00] 15273 ; 15274 ; SKX-LABEL: test_8xdouble_unpack_high_mask0: 15275 ; SKX: # %bb.0: 15276 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15277 ; SKX-NEXT: retq # sched: [7:1.00] 15278 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15279 ret <8 x double> %res 15280 } 15281 define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 15282 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask0: 15283 ; GENERIC: # %bb.0: 15284 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 15285 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15286 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 15287 ; GENERIC-NEXT: retq # sched: [1:1.00] 15288 ; 15289 ; SKX-LABEL: test_8xdouble_masked_unpack_high_mask0: 15290 ; SKX: # %bb.0: 15291 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 15292 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15293 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 15294 ; SKX-NEXT: retq # sched: [7:1.00] 15295 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15296 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15297 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15298 ret <8 x double> %res 15299 } 15300 15301 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 15302 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: 15303 ; GENERIC: # %bb.0: 15304 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15305 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15306 ; GENERIC-NEXT: retq # sched: [1:1.00] 15307 ; 15308 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: 15309 ; SKX: # %bb.0: 15310 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15311 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15312 ; SKX-NEXT: retq # sched: [7:1.00] 15313 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15314 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15315 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15316 ret <8 x double> %res 15317 } 15318 define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 15319 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask1: 15320 ; GENERIC: # %bb.0: 15321 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 15322 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15323 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 15324 ; GENERIC-NEXT: retq # sched: [1:1.00] 15325 ; 15326 ; SKX-LABEL: test_8xdouble_masked_unpack_high_mask1: 15327 ; SKX: # %bb.0: 15328 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 15329 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15330 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 15331 ; SKX-NEXT: retq # sched: [7:1.00] 15332 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15333 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15334 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15335 ret <8 x double> %res 15336 } 15337 15338 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 15339 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: 15340 ; GENERIC: # %bb.0: 15341 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15342 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15343 ; GENERIC-NEXT: retq # sched: [1:1.00] 15344 ; 15345 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: 15346 ; SKX: # %bb.0: 15347 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15348 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15349 ; SKX-NEXT: retq # sched: [7:1.00] 15350 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15351 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15352 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15353 ret <8 x double> %res 15354 } 15355 define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 15356 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask2: 15357 ; GENERIC: # %bb.0: 15358 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 15359 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15360 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 15361 ; GENERIC-NEXT: retq # sched: [1:1.00] 15362 ; 15363 ; SKX-LABEL: test_8xdouble_masked_unpack_high_mask2: 15364 ; SKX: # %bb.0: 15365 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 15366 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15367 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 15368 ; SKX-NEXT: retq # sched: [7:1.00] 15369 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15370 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15371 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15372 ret <8 x double> %res 15373 } 15374 15375 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 15376 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: 15377 ; GENERIC: # %bb.0: 15378 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15379 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15380 ; GENERIC-NEXT: retq # sched: [1:1.00] 15381 ; 15382 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: 15383 ; SKX: # %bb.0: 15384 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15385 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15386 ; SKX-NEXT: retq # sched: [7:1.00] 15387 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15388 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15389 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15390 ret <8 x double> %res 15391 } 15392 define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2) { 15393 ; GENERIC-LABEL: test_8xdouble_unpack_high_mask3: 15394 ; GENERIC: # %bb.0: 15395 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15396 ; GENERIC-NEXT: retq # sched: [1:1.00] 15397 ; 15398 ; SKX-LABEL: test_8xdouble_unpack_high_mask3: 15399 ; SKX: # %bb.0: 15400 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15401 ; SKX-NEXT: retq # sched: [7:1.00] 15402 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15403 ret <8 x double> %res 15404 } 15405 define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { 15406 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask3: 15407 ; GENERIC: # %bb.0: 15408 ; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 15409 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15410 ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] 15411 ; GENERIC-NEXT: retq # sched: [1:1.00] 15412 ; 15413 ; SKX-LABEL: test_8xdouble_masked_unpack_high_mask3: 15414 ; SKX: # %bb.0: 15415 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 15416 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15417 ; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] 15418 ; SKX-NEXT: retq # sched: [7:1.00] 15419 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15420 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15421 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15422 ret <8 x double> %res 15423 } 15424 15425 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { 15426 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: 15427 ; GENERIC: # %bb.0: 15428 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15429 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15430 ; GENERIC-NEXT: retq # sched: [1:1.00] 15431 ; 15432 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: 15433 ; SKX: # %bb.0: 15434 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15435 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] 15436 ; SKX-NEXT: retq # sched: [7:1.00] 15437 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15438 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15439 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15440 ret <8 x double> %res 15441 } 15442 define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { 15443 ; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask0: 15444 ; GENERIC: # %bb.0: 15445 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15446 ; GENERIC-NEXT: retq # sched: [1:1.00] 15447 ; 15448 ; SKX-LABEL: test_8xdouble_unpack_high_mem_mask0: 15449 ; SKX: # %bb.0: 15450 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15451 ; SKX-NEXT: retq # sched: [7:1.00] 15452 %vec2 = load <8 x double>, <8 x double>* %vec2p 15453 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15454 ret <8 x double> %res 15455 } 15456 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 15457 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: 15458 ; GENERIC: # %bb.0: 15459 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15460 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15461 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 15462 ; GENERIC-NEXT: retq # sched: [1:1.00] 15463 ; 15464 ; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: 15465 ; SKX: # %bb.0: 15466 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15467 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15468 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 15469 ; SKX-NEXT: retq # sched: [7:1.00] 15470 %vec2 = load <8 x double>, <8 x double>* %vec2p 15471 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15472 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15473 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15474 ret <8 x double> %res 15475 } 15476 15477 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 15478 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: 15479 ; GENERIC: # %bb.0: 15480 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 15481 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15482 ; GENERIC-NEXT: retq # sched: [1:1.00] 15483 ; 15484 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: 15485 ; SKX: # %bb.0: 15486 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 15487 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15488 ; SKX-NEXT: retq # sched: [7:1.00] 15489 %vec2 = load <8 x double>, <8 x double>* %vec2p 15490 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15491 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15492 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15493 ret <8 x double> %res 15494 } 15495 15496 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 15497 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: 15498 ; GENERIC: # %bb.0: 15499 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15500 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15501 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 15502 ; GENERIC-NEXT: retq # sched: [1:1.00] 15503 ; 15504 ; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: 15505 ; SKX: # %bb.0: 15506 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15507 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15508 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 15509 ; SKX-NEXT: retq # sched: [7:1.00] 15510 %vec2 = load <8 x double>, <8 x double>* %vec2p 15511 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15512 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15513 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15514 ret <8 x double> %res 15515 } 15516 15517 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 15518 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: 15519 ; GENERIC: # %bb.0: 15520 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 15521 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15522 ; GENERIC-NEXT: retq # sched: [1:1.00] 15523 ; 15524 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: 15525 ; SKX: # %bb.0: 15526 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 15527 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15528 ; SKX-NEXT: retq # sched: [7:1.00] 15529 %vec2 = load <8 x double>, <8 x double>* %vec2p 15530 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15531 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15532 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15533 ret <8 x double> %res 15534 } 15535 15536 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 15537 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: 15538 ; GENERIC: # %bb.0: 15539 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15540 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15541 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 15542 ; GENERIC-NEXT: retq # sched: [1:1.00] 15543 ; 15544 ; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: 15545 ; SKX: # %bb.0: 15546 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15547 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15548 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 15549 ; SKX-NEXT: retq # sched: [7:1.00] 15550 %vec2 = load <8 x double>, <8 x double>* %vec2p 15551 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15552 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15553 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15554 ret <8 x double> %res 15555 } 15556 15557 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 15558 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: 15559 ; GENERIC: # %bb.0: 15560 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 15561 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15562 ; GENERIC-NEXT: retq # sched: [1:1.00] 15563 ; 15564 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: 15565 ; SKX: # %bb.0: 15566 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 15567 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15568 ; SKX-NEXT: retq # sched: [7:1.00] 15569 %vec2 = load <8 x double>, <8 x double>* %vec2p 15570 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15571 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15572 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15573 ret <8 x double> %res 15574 } 15575 15576 define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { 15577 ; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask3: 15578 ; GENERIC: # %bb.0: 15579 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15580 ; GENERIC-NEXT: retq # sched: [1:1.00] 15581 ; 15582 ; SKX-LABEL: test_8xdouble_unpack_high_mem_mask3: 15583 ; SKX: # %bb.0: 15584 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15585 ; SKX-NEXT: retq # sched: [7:1.00] 15586 %vec2 = load <8 x double>, <8 x double>* %vec2p 15587 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15588 ret <8 x double> %res 15589 } 15590 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { 15591 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: 15592 ; GENERIC: # %bb.0: 15593 ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 15594 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15595 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 15596 ; GENERIC-NEXT: retq # sched: [1:1.00] 15597 ; 15598 ; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: 15599 ; SKX: # %bb.0: 15600 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 15601 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15602 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 15603 ; SKX-NEXT: retq # sched: [7:1.00] 15604 %vec2 = load <8 x double>, <8 x double>* %vec2p 15605 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15606 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15607 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 15608 ret <8 x double> %res 15609 } 15610 15611 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { 15612 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: 15613 ; GENERIC: # %bb.0: 15614 ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 15615 ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15616 ; GENERIC-NEXT: retq # sched: [1:1.00] 15617 ; 15618 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: 15619 ; SKX: # %bb.0: 15620 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 15621 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] 15622 ; SKX-NEXT: retq # sched: [7:1.00] 15623 %vec2 = load <8 x double>, <8 x double>* %vec2p 15624 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15625 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 15626 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 15627 ret <8 x double> %res 15628 } 15629 15630