1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s 3 4 define <64 x i8> @test1(i8 * %addr) { 5 ; CHECK-LABEL: test1: 6 ; CHECK: ## %bb.0: 7 ; CHECK-NEXT: vmovups (%rdi), %zmm0 8 ; CHECK-NEXT: retq 9 %vaddr = bitcast i8* %addr to <64 x i8>* 10 %res = load <64 x i8>, <64 x i8>* %vaddr, align 1 11 ret <64 x i8>%res 12 } 13 14 define void @test2(i8 * %addr, <64 x i8> %data) { 15 ; CHECK-LABEL: test2: 16 ; CHECK: ## %bb.0: 17 ; CHECK-NEXT: vmovups %zmm0, (%rdi) 18 ; CHECK-NEXT: retq 19 %vaddr = bitcast i8* %addr to <64 x i8>* 20 store <64 x i8>%data, <64 x i8>* %vaddr, align 1 21 ret void 22 } 23 24 define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) { 25 ; CHECK-LABEL: test3: 26 ; CHECK: ## %bb.0: 27 ; CHECK-NEXT: vptestmb %zmm1, %zmm1, %k1 28 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} 29 ; CHECK-NEXT: retq 30 %mask = icmp ne <64 x i8> %mask1, zeroinitializer 31 %vaddr = bitcast i8* %addr to <64 x i8>* 32 %r = load <64 x i8>, <64 x i8>* %vaddr, align 1 33 %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> %old 34 ret <64 x i8>%res 35 } 36 37 define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) { 38 ; CHECK-LABEL: test4: 39 ; CHECK: ## %bb.0: 40 ; CHECK-NEXT: vptestmb %zmm0, %zmm0, %k1 41 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 42 ; CHECK-NEXT: retq 43 %mask = icmp ne <64 x i8> %mask1, zeroinitializer 44 %vaddr = bitcast i8* %addr to <64 x i8>* 45 %r = load <64 x i8>, <64 x i8>* %vaddr, align 1 46 %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> zeroinitializer 47 ret <64 x i8>%res 48 } 49 50 define <32 x i16> @test5(i8 * %addr) { 51 ; CHECK-LABEL: test5: 52 ; CHECK: ## %bb.0: 53 ; CHECK-NEXT: vmovups (%rdi), %zmm0 54 ; CHECK-NEXT: retq 55 %vaddr = bitcast i8* %addr to <32 x i16>* 56 %res = load <32 x i16>, <32 x i16>* %vaddr, align 1 57 ret <32 x i16>%res 58 } 59 60 define void @test6(i8 * %addr, <32 x i16> %data) { 61 ; CHECK-LABEL: test6: 62 ; CHECK: ## %bb.0: 63 ; CHECK-NEXT: vmovups %zmm0, (%rdi) 64 ; CHECK-NEXT: retq 65 %vaddr = bitcast i8* %addr to <32 x i16>* 66 store <32 x i16>%data, <32 x i16>* %vaddr, align 1 67 ret void 68 } 69 70 define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) { 71 ; CHECK-LABEL: test7: 72 ; CHECK: ## %bb.0: 73 ; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 74 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} 75 ; CHECK-NEXT: retq 76 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 77 %vaddr = bitcast i8* %addr to <32 x i16>* 78 %r = load <32 x i16>, <32 x i16>* %vaddr, align 1 79 %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> %old 80 ret <32 x i16>%res 81 } 82 83 define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) { 84 ; CHECK-LABEL: test8: 85 ; CHECK: ## %bb.0: 86 ; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k1 87 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 88 ; CHECK-NEXT: retq 89 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 90 %vaddr = bitcast i8* %addr to <32 x i16>* 91 %r = load <32 x i16>, <32 x i16>* %vaddr, align 1 92 %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer 93 ret <32 x i16>%res 94 } 95 96 define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 97 ; CHECK-LABEL: test_mask_load_16xi8: 98 ; CHECK: ## %bb.0: 99 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 100 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 101 ; CHECK-NEXT: kmovw %k0, %k1 102 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 103 ; CHECK-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 104 ; CHECK-NEXT: retq 105 %res = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef) 106 ret <16 x i8> %res 107 } 108 declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) 109 110 define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 111 ; CHECK-LABEL: test_mask_load_32xi8: 112 ; CHECK: ## %bb.0: 113 ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 114 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 115 ; CHECK-NEXT: kmovd %k0, %k1 116 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 117 ; CHECK-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 118 ; CHECK-NEXT: retq 119 %res = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer) 120 ret <32 x i8> %res 121 } 122 declare <32 x i8> @llvm.masked.load.v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>) 123 124 define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 125 ; CHECK-LABEL: test_mask_load_8xi16: 126 ; CHECK: ## %bb.0: 127 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 128 ; CHECK-NEXT: vpmovw2m %zmm0, %k0 129 ; CHECK-NEXT: kshiftld $24, %k0, %k0 130 ; CHECK-NEXT: kshiftrd $24, %k0, %k1 131 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 132 ; CHECK-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 133 ; CHECK-NEXT: retq 134 %res = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef) 135 ret <8 x i16> %res 136 } 137 declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) 138 139 define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 140 ; CHECK-LABEL: test_mask_load_16xi16: 141 ; CHECK: ## %bb.0: 142 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 143 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 144 ; CHECK-NEXT: kmovw %k0, %k1 145 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 146 ; CHECK-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 147 ; CHECK-NEXT: retq 148 %res = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer) 149 ret <16 x i16> %res 150 } 151 declare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>) 152 153 define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 154 ; CHECK-LABEL: test_mask_store_16xi8: 155 ; CHECK: ## %bb.0: 156 ; CHECK-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 157 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 158 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 159 ; CHECK-NEXT: kmovw %k0, %k1 160 ; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 161 ; CHECK-NEXT: retq 162 call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask) 163 ret void 164 } 165 declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) 166 167 define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 168 ; CHECK-LABEL: test_mask_store_32xi8: 169 ; CHECK: ## %bb.0: 170 ; CHECK-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 171 ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 172 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 173 ; CHECK-NEXT: kmovd %k0, %k1 174 ; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 175 ; CHECK-NEXT: retq 176 call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask) 177 ret void 178 } 179 declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>) 180 181 define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 182 ; CHECK-LABEL: test_mask_store_8xi16: 183 ; CHECK: ## %bb.0: 184 ; CHECK-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 185 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 186 ; CHECK-NEXT: vpmovw2m %zmm0, %k0 187 ; CHECK-NEXT: kshiftld $24, %k0, %k0 188 ; CHECK-NEXT: kshiftrd $24, %k0, %k1 189 ; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 190 ; CHECK-NEXT: retq 191 call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask) 192 ret void 193 } 194 declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) 195 196 define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 197 ; CHECK-LABEL: test_mask_store_16xi16: 198 ; CHECK: ## %bb.0: 199 ; CHECK-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 200 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 201 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 202 ; CHECK-NEXT: kmovw %k0, %k1 203 ; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 204 ; CHECK-NEXT: retq 205 call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask) 206 ret void 207 } 208 declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>) 209