1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4 define <8 x i32> @test_256_1(i8 * %addr) { 5 ; CHECK-LABEL: test_256_1: 6 ; CHECK: ## BB#0: 7 ; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07] 8 ; CHECK-NEXT: retq ## encoding: [0xc3] 9 %vaddr = bitcast i8* %addr to <8 x i32>* 10 %res = load <8 x i32>, <8 x i32>* %vaddr, align 1 11 ret <8 x i32>%res 12 } 13 14 define <8 x i32> @test_256_2(i8 * %addr) { 15 ; CHECK-LABEL: test_256_2: 16 ; CHECK: ## BB#0: 17 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07] 18 ; CHECK-NEXT: retq ## encoding: [0xc3] 19 %vaddr = bitcast i8* %addr to <8 x i32>* 20 %res = load <8 x i32>, <8 x i32>* %vaddr, align 32 21 ret <8 x i32>%res 22 } 23 24 define void @test_256_3(i8 * %addr, <4 x i64> %data) { 25 ; CHECK-LABEL: test_256_3: 26 ; CHECK: ## BB#0: 27 ; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x07] 28 ; CHECK-NEXT: retq ## encoding: [0xc3] 29 %vaddr = bitcast i8* %addr to <4 x i64>* 30 store <4 x i64>%data, <4 x i64>* %vaddr, align 32 31 ret void 32 } 33 34 define void @test_256_4(i8 * %addr, <8 x i32> %data) { 35 ; CHECK-LABEL: test_256_4: 36 ; CHECK: ## BB#0: 37 ; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x07] 38 ; CHECK-NEXT: retq ## encoding: [0xc3] 39 %vaddr = bitcast i8* %addr to <8 x i32>* 40 store <8 x i32>%data, <8 x i32>* %vaddr, align 1 41 ret void 42 } 43 44 define void @test_256_5(i8 * %addr, <8 x i32> %data) { 45 ; CHECK-LABEL: test_256_5: 46 ; CHECK: ## BB#0: 47 ; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x07] 48 ; CHECK-NEXT: retq ## encoding: [0xc3] 49 %vaddr = bitcast i8* %addr to <8 x i32>* 50 store <8 x i32>%data, <8 x i32>* %vaddr, align 32 51 ret void 52 } 53 54 define <4 x i64> @test_256_6(i8 * %addr) { 55 ; CHECK-LABEL: test_256_6: 56 ; CHECK: ## BB#0: 57 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07] 58 ; CHECK-NEXT: retq ## encoding: [0xc3] 59 %vaddr = bitcast i8* %addr to <4 x i64>* 60 %res = load <4 x i64>, <4 x i64>* %vaddr, align 32 61 ret <4 x i64>%res 62 } 63 64 define void @test_256_7(i8 * %addr, <4 x i64> %data) { 65 ; CHECK-LABEL: test_256_7: 66 ; CHECK: ## BB#0: 67 ; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x07] 68 ; CHECK-NEXT: retq ## encoding: [0xc3] 69 %vaddr = bitcast i8* %addr to <4 x i64>* 70 store <4 x i64>%data, <4 x i64>* %vaddr, align 1 71 ret void 72 } 73 74 define <4 x i64> @test_256_8(i8 * %addr) { 75 ; CHECK-LABEL: test_256_8: 76 ; CHECK: ## BB#0: 77 ; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07] 78 ; CHECK-NEXT: retq ## encoding: [0xc3] 79 %vaddr = bitcast i8* %addr to <4 x i64>* 80 %res = load <4 x i64>, <4 x i64>* %vaddr, align 1 81 ret <4 x i64>%res 82 } 83 84 define void @test_256_9(i8 * %addr, <4 x double> %data) { 85 ; CHECK-LABEL: test_256_9: 86 ; CHECK: ## BB#0: 87 ; CHECK-NEXT: vmovapd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x07] 88 ; CHECK-NEXT: retq ## encoding: [0xc3] 89 %vaddr = bitcast i8* %addr to <4 x double>* 90 store <4 x double>%data, <4 x double>* %vaddr, align 32 91 ret void 92 } 93 94 define <4 x double> @test_256_10(i8 * %addr) { 95 ; CHECK-LABEL: test_256_10: 96 ; CHECK: ## BB#0: 97 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07] 98 ; CHECK-NEXT: retq ## encoding: [0xc3] 99 %vaddr = bitcast i8* %addr to <4 x double>* 100 %res = load <4 x double>, <4 x double>* %vaddr, align 32 101 ret <4 x double>%res 102 } 103 104 define void @test_256_11(i8 * %addr, <8 x float> %data) { 105 ; CHECK-LABEL: test_256_11: 106 ; CHECK: ## BB#0: 107 ; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07] 108 ; CHECK-NEXT: retq ## encoding: [0xc3] 109 %vaddr = bitcast i8* %addr to <8 x float>* 110 store <8 x float>%data, <8 x float>* %vaddr, align 32 111 ret void 112 } 113 114 define <8 x float> @test_256_12(i8 * %addr) { 115 ; CHECK-LABEL: test_256_12: 116 ; CHECK: ## BB#0: 117 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] 118 ; CHECK-NEXT: retq ## encoding: [0xc3] 119 %vaddr = bitcast i8* %addr to <8 x float>* 120 %res = load <8 x float>, <8 x float>* %vaddr, align 32 121 ret <8 x float>%res 122 } 123 124 define void @test_256_13(i8 * %addr, <4 x double> %data) { 125 ; CHECK-LABEL: test_256_13: 126 ; CHECK: ## BB#0: 127 ; CHECK-NEXT: vmovupd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x07] 128 ; CHECK-NEXT: retq ## encoding: [0xc3] 129 %vaddr = bitcast i8* %addr to <4 x double>* 130 store <4 x double>%data, <4 x double>* %vaddr, align 1 131 ret void 132 } 133 134 define <4 x double> @test_256_14(i8 * %addr) { 135 ; CHECK-LABEL: test_256_14: 136 ; CHECK: ## BB#0: 137 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07] 138 ; CHECK-NEXT: retq ## encoding: [0xc3] 139 %vaddr = bitcast i8* %addr to <4 x double>* 140 %res = load <4 x double>, <4 x double>* %vaddr, align 1 141 ret <4 x double>%res 142 } 143 144 define void @test_256_15(i8 * %addr, <8 x float> %data) { 145 ; CHECK-LABEL: test_256_15: 146 ; CHECK: ## BB#0: 147 ; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07] 148 ; CHECK-NEXT: retq ## encoding: [0xc3] 149 %vaddr = bitcast i8* %addr to <8 x float>* 150 store <8 x float>%data, <8 x float>* %vaddr, align 1 151 ret void 152 } 153 154 define <8 x float> @test_256_16(i8 * %addr) { 155 ; CHECK-LABEL: test_256_16: 156 ; CHECK: ## BB#0: 157 ; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] 158 ; CHECK-NEXT: retq ## encoding: [0xc3] 159 %vaddr = bitcast i8* %addr to <8 x float>* 160 %res = load <8 x float>, <8 x float>* %vaddr, align 1 161 ret <8 x float>%res 162 } 163 164 define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { 165 ; CHECK-LABEL: test_256_17: 166 ; CHECK: ## BB#0: 167 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 168 ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] 169 ; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] 170 ; CHECK-NEXT: retq ## encoding: [0xc3] 171 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 172 %vaddr = bitcast i8* %addr to <8 x i32>* 173 %r = load <8 x i32>, <8 x i32>* %vaddr, align 32 174 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old 175 ret <8 x i32>%res 176 } 177 178 define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { 179 ; CHECK-LABEL: test_256_18: 180 ; CHECK: ## BB#0: 181 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 182 ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] 183 ; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] 184 ; CHECK-NEXT: retq ## encoding: [0xc3] 185 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 186 %vaddr = bitcast i8* %addr to <8 x i32>* 187 %r = load <8 x i32>, <8 x i32>* %vaddr, align 1 188 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old 189 ret <8 x i32>%res 190 } 191 192 define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) { 193 ; CHECK-LABEL: test_256_19: 194 ; CHECK: ## BB#0: 195 ; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 196 ; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04] 197 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07] 198 ; CHECK-NEXT: retq ## encoding: [0xc3] 199 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 200 %vaddr = bitcast i8* %addr to <8 x i32>* 201 %r = load <8 x i32>, <8 x i32>* %vaddr, align 32 202 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer 203 ret <8 x i32>%res 204 } 205 206 define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) { 207 ; CHECK-LABEL: test_256_20: 208 ; CHECK: ## BB#0: 209 ; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 210 ; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04] 211 ; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07] 212 ; CHECK-NEXT: retq ## encoding: [0xc3] 213 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 214 %vaddr = bitcast i8* %addr to <8 x i32>* 215 %r = load <8 x i32>, <8 x i32>* %vaddr, align 1 216 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer 217 ret <8 x i32>%res 218 } 219 220 define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { 221 ; CHECK-LABEL: test_256_21: 222 ; CHECK: ## BB#0: 223 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 224 ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] 225 ; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] 226 ; CHECK-NEXT: retq ## encoding: [0xc3] 227 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 228 %vaddr = bitcast i8* %addr to <4 x i64>* 229 %r = load <4 x i64>, <4 x i64>* %vaddr, align 32 230 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old 231 ret <4 x i64>%res 232 } 233 234 define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { 235 ; CHECK-LABEL: test_256_22: 236 ; CHECK: ## BB#0: 237 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 238 ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] 239 ; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] 240 ; CHECK-NEXT: retq ## encoding: [0xc3] 241 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 242 %vaddr = bitcast i8* %addr to <4 x i64>* 243 %r = load <4 x i64>, <4 x i64>* %vaddr, align 1 244 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old 245 ret <4 x i64>%res 246 } 247 248 define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) { 249 ; CHECK-LABEL: test_256_23: 250 ; CHECK: ## BB#0: 251 ; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 252 ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] 253 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07] 254 ; CHECK-NEXT: retq ## encoding: [0xc3] 255 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 256 %vaddr = bitcast i8* %addr to <4 x i64>* 257 %r = load <4 x i64>, <4 x i64>* %vaddr, align 32 258 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer 259 ret <4 x i64>%res 260 } 261 262 define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) { 263 ; CHECK-LABEL: test_256_24: 264 ; CHECK: ## BB#0: 265 ; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 266 ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] 267 ; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07] 268 ; CHECK-NEXT: retq ## encoding: [0xc3] 269 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 270 %vaddr = bitcast i8* %addr to <4 x i64>* 271 %r = load <4 x i64>, <4 x i64>* %vaddr, align 1 272 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer 273 ret <4 x i64>%res 274 } 275 276 define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { 277 ; CHECK-LABEL: test_256_25: 278 ; CHECK: ## BB#0: 279 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 280 ; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] 281 ; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] 282 ; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] 283 ; CHECK-NEXT: retq ## encoding: [0xc3] 284 %mask = fcmp one <8 x float> %mask1, zeroinitializer 285 %vaddr = bitcast i8* %addr to <8 x float>* 286 %r = load <8 x float>, <8 x float>* %vaddr, align 32 287 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old 288 ret <8 x float>%res 289 } 290 291 define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { 292 ; CHECK-LABEL: test_256_26: 293 ; CHECK: ## BB#0: 294 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 295 ; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] 296 ; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] 297 ; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] 298 ; CHECK-NEXT: retq ## encoding: [0xc3] 299 %mask = fcmp one <8 x float> %mask1, zeroinitializer 300 %vaddr = bitcast i8* %addr to <8 x float>* 301 %r = load <8 x float>, <8 x float>* %vaddr, align 1 302 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old 303 ret <8 x float>%res 304 } 305 306 define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) { 307 ; CHECK-LABEL: test_256_27: 308 ; CHECK: ## BB#0: 309 ; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 310 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07] 311 ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04] 312 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07] 313 ; CHECK-NEXT: retq ## encoding: [0xc3] 314 %mask = fcmp one <8 x float> %mask1, zeroinitializer 315 %vaddr = bitcast i8* %addr to <8 x float>* 316 %r = load <8 x float>, <8 x float>* %vaddr, align 32 317 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer 318 ret <8 x float>%res 319 } 320 321 define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) { 322 ; CHECK-LABEL: test_256_28: 323 ; CHECK: ## BB#0: 324 ; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 325 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07] 326 ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04] 327 ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07] 328 ; CHECK-NEXT: retq ## encoding: [0xc3] 329 %mask = fcmp one <8 x float> %mask1, zeroinitializer 330 %vaddr = bitcast i8* %addr to <8 x float>* 331 %r = load <8 x float>, <8 x float>* %vaddr, align 1 332 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer 333 ret <8 x float>%res 334 } 335 336 define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { 337 ; CHECK-LABEL: test_256_29: 338 ; CHECK: ## BB#0: 339 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 340 ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] 341 ; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] 342 ; CHECK-NEXT: retq ## encoding: [0xc3] 343 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 344 %vaddr = bitcast i8* %addr to <4 x double>* 345 %r = load <4 x double>, <4 x double>* %vaddr, align 32 346 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old 347 ret <4 x double>%res 348 } 349 350 define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { 351 ; CHECK-LABEL: test_256_30: 352 ; CHECK: ## BB#0: 353 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 354 ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] 355 ; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] 356 ; CHECK-NEXT: retq ## encoding: [0xc3] 357 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 358 %vaddr = bitcast i8* %addr to <4 x double>* 359 %r = load <4 x double>, <4 x double>* %vaddr, align 1 360 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old 361 ret <4 x double>%res 362 } 363 364 define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) { 365 ; CHECK-LABEL: test_256_31: 366 ; CHECK: ## BB#0: 367 ; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 368 ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] 369 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07] 370 ; CHECK-NEXT: retq ## encoding: [0xc3] 371 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 372 %vaddr = bitcast i8* %addr to <4 x double>* 373 %r = load <4 x double>, <4 x double>* %vaddr, align 32 374 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer 375 ret <4 x double>%res 376 } 377 378 define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) { 379 ; CHECK-LABEL: test_256_32: 380 ; CHECK: ## BB#0: 381 ; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 382 ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] 383 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07] 384 ; CHECK-NEXT: retq ## encoding: [0xc3] 385 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 386 %vaddr = bitcast i8* %addr to <4 x double>* 387 %r = load <4 x double>, <4 x double>* %vaddr, align 1 388 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer 389 ret <4 x double>%res 390 } 391 392 define <4 x i32> @test_128_1(i8 * %addr) { 393 ; CHECK-LABEL: test_128_1: 394 ; CHECK: ## BB#0: 395 ; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07] 396 ; CHECK-NEXT: retq ## encoding: [0xc3] 397 %vaddr = bitcast i8* %addr to <4 x i32>* 398 %res = load <4 x i32>, <4 x i32>* %vaddr, align 1 399 ret <4 x i32>%res 400 } 401 402 define <4 x i32> @test_128_2(i8 * %addr) { 403 ; CHECK-LABEL: test_128_2: 404 ; CHECK: ## BB#0: 405 ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07] 406 ; CHECK-NEXT: retq ## encoding: [0xc3] 407 %vaddr = bitcast i8* %addr to <4 x i32>* 408 %res = load <4 x i32>, <4 x i32>* %vaddr, align 16 409 ret <4 x i32>%res 410 } 411 412 define void @test_128_3(i8 * %addr, <2 x i64> %data) { 413 ; CHECK-LABEL: test_128_3: 414 ; CHECK: ## BB#0: 415 ; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x07] 416 ; CHECK-NEXT: retq ## encoding: [0xc3] 417 %vaddr = bitcast i8* %addr to <2 x i64>* 418 store <2 x i64>%data, <2 x i64>* %vaddr, align 16 419 ret void 420 } 421 422 define void @test_128_4(i8 * %addr, <4 x i32> %data) { 423 ; CHECK-LABEL: test_128_4: 424 ; CHECK: ## BB#0: 425 ; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x07] 426 ; CHECK-NEXT: retq ## encoding: [0xc3] 427 %vaddr = bitcast i8* %addr to <4 x i32>* 428 store <4 x i32>%data, <4 x i32>* %vaddr, align 1 429 ret void 430 } 431 432 define void @test_128_5(i8 * %addr, <4 x i32> %data) { 433 ; CHECK-LABEL: test_128_5: 434 ; CHECK: ## BB#0: 435 ; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x07] 436 ; CHECK-NEXT: retq ## encoding: [0xc3] 437 %vaddr = bitcast i8* %addr to <4 x i32>* 438 store <4 x i32>%data, <4 x i32>* %vaddr, align 16 439 ret void 440 } 441 442 define <2 x i64> @test_128_6(i8 * %addr) { 443 ; CHECK-LABEL: test_128_6: 444 ; CHECK: ## BB#0: 445 ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07] 446 ; CHECK-NEXT: retq ## encoding: [0xc3] 447 %vaddr = bitcast i8* %addr to <2 x i64>* 448 %res = load <2 x i64>, <2 x i64>* %vaddr, align 16 449 ret <2 x i64>%res 450 } 451 452 define void @test_128_7(i8 * %addr, <2 x i64> %data) { 453 ; CHECK-LABEL: test_128_7: 454 ; CHECK: ## BB#0: 455 ; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x07] 456 ; CHECK-NEXT: retq ## encoding: [0xc3] 457 %vaddr = bitcast i8* %addr to <2 x i64>* 458 store <2 x i64>%data, <2 x i64>* %vaddr, align 1 459 ret void 460 } 461 462 define <2 x i64> @test_128_8(i8 * %addr) { 463 ; CHECK-LABEL: test_128_8: 464 ; CHECK: ## BB#0: 465 ; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07] 466 ; CHECK-NEXT: retq ## encoding: [0xc3] 467 %vaddr = bitcast i8* %addr to <2 x i64>* 468 %res = load <2 x i64>, <2 x i64>* %vaddr, align 1 469 ret <2 x i64>%res 470 } 471 472 define void @test_128_9(i8 * %addr, <2 x double> %data) { 473 ; CHECK-LABEL: test_128_9: 474 ; CHECK: ## BB#0: 475 ; CHECK-NEXT: vmovapd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x07] 476 ; CHECK-NEXT: retq ## encoding: [0xc3] 477 %vaddr = bitcast i8* %addr to <2 x double>* 478 store <2 x double>%data, <2 x double>* %vaddr, align 16 479 ret void 480 } 481 482 define <2 x double> @test_128_10(i8 * %addr) { 483 ; CHECK-LABEL: test_128_10: 484 ; CHECK: ## BB#0: 485 ; CHECK-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07] 486 ; CHECK-NEXT: retq ## encoding: [0xc3] 487 %vaddr = bitcast i8* %addr to <2 x double>* 488 %res = load <2 x double>, <2 x double>* %vaddr, align 16 489 ret <2 x double>%res 490 } 491 492 define void @test_128_11(i8 * %addr, <4 x float> %data) { 493 ; CHECK-LABEL: test_128_11: 494 ; CHECK: ## BB#0: 495 ; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07] 496 ; CHECK-NEXT: retq ## encoding: [0xc3] 497 %vaddr = bitcast i8* %addr to <4 x float>* 498 store <4 x float>%data, <4 x float>* %vaddr, align 16 499 ret void 500 } 501 502 define <4 x float> @test_128_12(i8 * %addr) { 503 ; CHECK-LABEL: test_128_12: 504 ; CHECK: ## BB#0: 505 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] 506 ; CHECK-NEXT: retq ## encoding: [0xc3] 507 %vaddr = bitcast i8* %addr to <4 x float>* 508 %res = load <4 x float>, <4 x float>* %vaddr, align 16 509 ret <4 x float>%res 510 } 511 512 define void @test_128_13(i8 * %addr, <2 x double> %data) { 513 ; CHECK-LABEL: test_128_13: 514 ; CHECK: ## BB#0: 515 ; CHECK-NEXT: vmovupd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x07] 516 ; CHECK-NEXT: retq ## encoding: [0xc3] 517 %vaddr = bitcast i8* %addr to <2 x double>* 518 store <2 x double>%data, <2 x double>* %vaddr, align 1 519 ret void 520 } 521 522 define <2 x double> @test_128_14(i8 * %addr) { 523 ; CHECK-LABEL: test_128_14: 524 ; CHECK: ## BB#0: 525 ; CHECK-NEXT: vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07] 526 ; CHECK-NEXT: retq ## encoding: [0xc3] 527 %vaddr = bitcast i8* %addr to <2 x double>* 528 %res = load <2 x double>, <2 x double>* %vaddr, align 1 529 ret <2 x double>%res 530 } 531 532 define void @test_128_15(i8 * %addr, <4 x float> %data) { 533 ; CHECK-LABEL: test_128_15: 534 ; CHECK: ## BB#0: 535 ; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07] 536 ; CHECK-NEXT: retq ## encoding: [0xc3] 537 %vaddr = bitcast i8* %addr to <4 x float>* 538 store <4 x float>%data, <4 x float>* %vaddr, align 1 539 ret void 540 } 541 542 define <4 x float> @test_128_16(i8 * %addr) { 543 ; CHECK-LABEL: test_128_16: 544 ; CHECK: ## BB#0: 545 ; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] 546 ; CHECK-NEXT: retq ## encoding: [0xc3] 547 %vaddr = bitcast i8* %addr to <4 x float>* 548 %res = load <4 x float>, <4 x float>* %vaddr, align 1 549 ret <4 x float>%res 550 } 551 552 define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { 553 ; CHECK-LABEL: test_128_17: 554 ; CHECK: ## BB#0: 555 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 556 ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] 557 ; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] 558 ; CHECK-NEXT: retq ## encoding: [0xc3] 559 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 560 %vaddr = bitcast i8* %addr to <4 x i32>* 561 %r = load <4 x i32>, <4 x i32>* %vaddr, align 16 562 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old 563 ret <4 x i32>%res 564 } 565 566 define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { 567 ; CHECK-LABEL: test_128_18: 568 ; CHECK: ## BB#0: 569 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 570 ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] 571 ; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] 572 ; CHECK-NEXT: retq ## encoding: [0xc3] 573 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 574 %vaddr = bitcast i8* %addr to <4 x i32>* 575 %r = load <4 x i32>, <4 x i32>* %vaddr, align 1 576 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old 577 ret <4 x i32>%res 578 } 579 580 define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) { 581 ; CHECK-LABEL: test_128_19: 582 ; CHECK: ## BB#0: 583 ; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 584 ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] 585 ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07] 586 ; CHECK-NEXT: retq ## encoding: [0xc3] 587 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 588 %vaddr = bitcast i8* %addr to <4 x i32>* 589 %r = load <4 x i32>, <4 x i32>* %vaddr, align 16 590 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer 591 ret <4 x i32>%res 592 } 593 594 define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) { 595 ; CHECK-LABEL: test_128_20: 596 ; CHECK: ## BB#0: 597 ; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 598 ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] 599 ; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07] 600 ; CHECK-NEXT: retq ## encoding: [0xc3] 601 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 602 %vaddr = bitcast i8* %addr to <4 x i32>* 603 %r = load <4 x i32>, <4 x i32>* %vaddr, align 1 604 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer 605 ret <4 x i32>%res 606 } 607 608 define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { 609 ; CHECK-LABEL: test_128_21: 610 ; CHECK: ## BB#0: 611 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 612 ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] 613 ; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] 614 ; CHECK-NEXT: retq ## encoding: [0xc3] 615 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 616 %vaddr = bitcast i8* %addr to <2 x i64>* 617 %r = load <2 x i64>, <2 x i64>* %vaddr, align 16 618 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old 619 ret <2 x i64>%res 620 } 621 622 define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { 623 ; CHECK-LABEL: test_128_22: 624 ; CHECK: ## BB#0: 625 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 626 ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] 627 ; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] 628 ; CHECK-NEXT: retq ## encoding: [0xc3] 629 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 630 %vaddr = bitcast i8* %addr to <2 x i64>* 631 %r = load <2 x i64>, <2 x i64>* %vaddr, align 1 632 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old 633 ret <2 x i64>%res 634 } 635 636 define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) { 637 ; CHECK-LABEL: test_128_23: 638 ; CHECK: ## BB#0: 639 ; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 640 ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] 641 ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07] 642 ; CHECK-NEXT: retq ## encoding: [0xc3] 643 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 644 %vaddr = bitcast i8* %addr to <2 x i64>* 645 %r = load <2 x i64>, <2 x i64>* %vaddr, align 16 646 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer 647 ret <2 x i64>%res 648 } 649 650 define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) { 651 ; CHECK-LABEL: test_128_24: 652 ; CHECK: ## BB#0: 653 ; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 654 ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] 655 ; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07] 656 ; CHECK-NEXT: retq ## encoding: [0xc3] 657 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 658 %vaddr = bitcast i8* %addr to <2 x i64>* 659 %r = load <2 x i64>, <2 x i64>* %vaddr, align 1 660 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer 661 ret <2 x i64>%res 662 } 663 664 define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { 665 ; CHECK-LABEL: test_128_25: 666 ; CHECK: ## BB#0: 667 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 668 ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] 669 ; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] 670 ; CHECK-NEXT: retq ## encoding: [0xc3] 671 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 672 %vaddr = bitcast i8* %addr to <4 x float>* 673 %r = load <4 x float>, <4 x float>* %vaddr, align 16 674 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old 675 ret <4 x float>%res 676 } 677 678 define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { 679 ; CHECK-LABEL: test_128_26: 680 ; CHECK: ## BB#0: 681 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 682 ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] 683 ; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] 684 ; CHECK-NEXT: retq ## encoding: [0xc3] 685 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 686 %vaddr = bitcast i8* %addr to <4 x float>* 687 %r = load <4 x float>, <4 x float>* %vaddr, align 1 688 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old 689 ret <4 x float>%res 690 } 691 692 define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) { 693 ; CHECK-LABEL: test_128_27: 694 ; CHECK: ## BB#0: 695 ; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 696 ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] 697 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07] 698 ; CHECK-NEXT: retq ## encoding: [0xc3] 699 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 700 %vaddr = bitcast i8* %addr to <4 x float>* 701 %r = load <4 x float>, <4 x float>* %vaddr, align 16 702 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer 703 ret <4 x float>%res 704 } 705 706 define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) { 707 ; CHECK-LABEL: test_128_28: 708 ; CHECK: ## BB#0: 709 ; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 710 ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] 711 ; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07] 712 ; CHECK-NEXT: retq ## encoding: [0xc3] 713 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 714 %vaddr = bitcast i8* %addr to <4 x float>* 715 %r = load <4 x float>, <4 x float>* %vaddr, align 1 716 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer 717 ret <4 x float>%res 718 } 719 720 define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { 721 ; CHECK-LABEL: test_128_29: 722 ; CHECK: ## BB#0: 723 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 724 ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] 725 ; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] 726 ; CHECK-NEXT: retq ## encoding: [0xc3] 727 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 728 %vaddr = bitcast i8* %addr to <2 x double>* 729 %r = load <2 x double>, <2 x double>* %vaddr, align 16 730 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old 731 ret <2 x double>%res 732 } 733 734 define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { 735 ; CHECK-LABEL: test_128_30: 736 ; CHECK: ## BB#0: 737 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 738 ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] 739 ; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] 740 ; CHECK-NEXT: retq ## encoding: [0xc3] 741 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 742 %vaddr = bitcast i8* %addr to <2 x double>* 743 %r = load <2 x double>, <2 x double>* %vaddr, align 1 744 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old 745 ret <2 x double>%res 746 } 747 748 define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) { 749 ; CHECK-LABEL: test_128_31: 750 ; CHECK: ## BB#0: 751 ; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 752 ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] 753 ; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07] 754 ; CHECK-NEXT: retq ## encoding: [0xc3] 755 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 756 %vaddr = bitcast i8* %addr to <2 x double>* 757 %r = load <2 x double>, <2 x double>* %vaddr, align 16 758 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer 759 ret <2 x double>%res 760 } 761 762 define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) { 763 ; CHECK-LABEL: test_128_32: 764 ; CHECK: ## BB#0: 765 ; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 766 ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] 767 ; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07] 768 ; CHECK-NEXT: retq ## encoding: [0xc3] 769 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 770 %vaddr = bitcast i8* %addr to <2 x double>* 771 %r = load <2 x double>, <2 x double>* %vaddr, align 1 772 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer 773 ret <2 x double>%res 774 } 775 776