1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s 3 4 define <64 x i8> @test1(i8 * %addr) { 5 ; CHECK-LABEL: test1: 6 ; CHECK: ## BB#0: 7 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 8 ; CHECK-NEXT: retq 9 %vaddr = bitcast i8* %addr to <64 x i8>* 10 %res = load <64 x i8>, <64 x i8>* %vaddr, align 1 11 ret <64 x i8>%res 12 } 13 14 define void @test2(i8 * %addr, <64 x i8> %data) { 15 ; CHECK-LABEL: test2: 16 ; CHECK: ## BB#0: 17 ; CHECK-NEXT: vmovdqu8 %zmm0, (%rdi) 18 ; CHECK-NEXT: retq 19 %vaddr = bitcast i8* %addr to <64 x i8>* 20 store <64 x i8>%data, <64 x i8>* %vaddr, align 1 21 ret void 22 } 23 24 define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) { 25 ; CHECK-LABEL: test3: 26 ; CHECK: ## BB#0: 27 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 28 ; CHECK-NEXT: vpcmpneqb %zmm2, %zmm1, %k1 29 ; CHECK-NEXT: vpblendmb (%rdi), %zmm0, %zmm0 {%k1} 30 ; CHECK-NEXT: retq 31 %mask = icmp ne <64 x i8> %mask1, zeroinitializer 32 %vaddr = bitcast i8* %addr to <64 x i8>* 33 %r = load <64 x i8>, <64 x i8>* %vaddr, align 1 34 %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> %old 35 ret <64 x i8>%res 36 } 37 38 define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) { 39 ; CHECK-LABEL: test4: 40 ; CHECK: ## BB#0: 41 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 42 ; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k1 43 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 44 ; CHECK-NEXT: retq 45 %mask = icmp ne <64 x i8> %mask1, zeroinitializer 46 %vaddr = bitcast i8* %addr to <64 x i8>* 47 %r = load <64 x i8>, <64 x i8>* %vaddr, align 1 48 %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> zeroinitializer 49 ret <64 x i8>%res 50 } 51 52 define <32 x i16> @test5(i8 * %addr) { 53 ; CHECK-LABEL: test5: 54 ; CHECK: ## BB#0: 55 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 56 ; CHECK-NEXT: retq 57 %vaddr = bitcast i8* %addr to <32 x i16>* 58 %res = load <32 x i16>, <32 x i16>* %vaddr, align 1 59 ret <32 x i16>%res 60 } 61 62 define void @test6(i8 * %addr, <32 x i16> %data) { 63 ; CHECK-LABEL: test6: 64 ; CHECK: ## BB#0: 65 ; CHECK-NEXT: vmovdqu16 %zmm0, (%rdi) 66 ; CHECK-NEXT: retq 67 %vaddr = bitcast i8* %addr to <32 x i16>* 68 store <32 x i16>%data, <32 x i16>* %vaddr, align 1 69 ret void 70 } 71 72 define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) { 73 ; CHECK-LABEL: test7: 74 ; CHECK: ## BB#0: 75 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 76 ; CHECK-NEXT: vpcmpneqw %zmm2, %zmm1, %k1 77 ; CHECK-NEXT: vpblendmw (%rdi), %zmm0, %zmm0 {%k1} 78 ; CHECK-NEXT: retq 79 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 80 %vaddr = bitcast i8* %addr to <32 x i16>* 81 %r = load <32 x i16>, <32 x i16>* %vaddr, align 1 82 %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> %old 83 ret <32 x i16>%res 84 } 85 86 define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) { 87 ; CHECK-LABEL: test8: 88 ; CHECK: ## BB#0: 89 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 90 ; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k1 91 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 92 ; CHECK-NEXT: retq 93 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 94 %vaddr = bitcast i8* %addr to <32 x i16>* 95 %r = load <32 x i16>, <32 x i16>* %vaddr, align 1 96 %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer 97 ret <32 x i16>%res 98 } 99 100 define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 101 ; CHECK-LABEL: test_mask_load_16xi8: 102 ; CHECK: ## BB#0: 103 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 104 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 105 ; CHECK-NEXT: kshiftlq $48, %k0, %k0 106 ; CHECK-NEXT: kshiftrq $48, %k0, %k1 107 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 108 ; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 109 ; CHECK-NEXT: retq 110 %res = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef) 111 ret <16 x i8> %res 112 } 113 declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) 114 115 define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 116 ; CHECK-LABEL: test_mask_load_32xi8: 117 ; CHECK: ## BB#0: 118 ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 119 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 120 ; CHECK-NEXT: kshiftlq $32, %k0, %k0 121 ; CHECK-NEXT: kshiftrq $32, %k0, %k1 122 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 123 ; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 124 ; CHECK-NEXT: retq 125 %res = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer) 126 ret <32 x i8> %res 127 } 128 declare <32 x i8> @llvm.masked.load.v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>) 129 130 define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 131 ; CHECK-LABEL: test_mask_load_8xi16: 132 ; CHECK: ## BB#0: 133 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 134 ; CHECK-NEXT: vpmovw2m %zmm0, %k0 135 ; CHECK-NEXT: kshiftld $24, %k0, %k0 136 ; CHECK-NEXT: kshiftrd $24, %k0, %k1 137 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 138 ; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 139 ; CHECK-NEXT: retq 140 %res = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef) 141 ret <8 x i16> %res 142 } 143 declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) 144 145 define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 146 ; CHECK-LABEL: test_mask_load_16xi16: 147 ; CHECK: ## BB#0: 148 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 149 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 150 ; CHECK-NEXT: kshiftld $16, %k0, %k0 151 ; CHECK-NEXT: kshiftrd $16, %k0, %k1 152 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 153 ; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 154 ; CHECK-NEXT: retq 155 %res = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer) 156 ret <16 x i16> %res 157 } 158 declare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>) 159 160 define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 161 ; CHECK-LABEL: test_mask_store_16xi8: 162 ; CHECK: ## BB#0: 163 ; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def> 164 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 165 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 166 ; CHECK-NEXT: kshiftlq $48, %k0, %k0 167 ; CHECK-NEXT: kshiftrq $48, %k0, %k1 168 ; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 169 ; CHECK-NEXT: retq 170 call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask) 171 ret void 172 } 173 declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) 174 175 define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 176 ; CHECK-LABEL: test_mask_store_32xi8: 177 ; CHECK: ## BB#0: 178 ; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 179 ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 180 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 181 ; CHECK-NEXT: kshiftlq $32, %k0, %k0 182 ; CHECK-NEXT: kshiftrq $32, %k0, %k1 183 ; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 184 ; CHECK-NEXT: retq 185 call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask) 186 ret void 187 } 188 declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>) 189 190 define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 191 ; CHECK-LABEL: test_mask_store_8xi16: 192 ; CHECK: ## BB#0: 193 ; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def> 194 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 195 ; CHECK-NEXT: vpmovw2m %zmm0, %k0 196 ; CHECK-NEXT: kshiftld $24, %k0, %k0 197 ; CHECK-NEXT: kshiftrd $24, %k0, %k1 198 ; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 199 ; CHECK-NEXT: retq 200 call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask) 201 ret void 202 } 203 declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) 204 205 define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 206 ; CHECK-LABEL: test_mask_store_16xi16: 207 ; CHECK: ## BB#0: 208 ; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 209 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 210 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 211 ; CHECK-NEXT: kshiftld $16, %k0, %k0 212 ; CHECK-NEXT: kshiftrd $16, %k0, %k1 213 ; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 214 ; CHECK-NEXT: retq 215 call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask) 216 ret void 217 } 218 declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>) 219