1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4 define <32 x i8> @test_256_1(i8 * %addr) { 5 ; CHECK-LABEL: test_256_1: 6 ; CHECK: ## %bb.0: 7 ; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 8 ; CHECK-NEXT: retq ## encoding: [0xc3] 9 %vaddr = bitcast i8* %addr to <32 x i8>* 10 %res = load <32 x i8>, <32 x i8>* %vaddr, align 1 11 ret <32 x i8>%res 12 } 13 14 define void @test_256_2(i8 * %addr, <32 x i8> %data) { 15 ; CHECK-LABEL: test_256_2: 16 ; CHECK: ## %bb.0: 17 ; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 18 ; CHECK-NEXT: retq ## encoding: [0xc3] 19 %vaddr = bitcast i8* %addr to <32 x i8>* 20 store <32 x i8>%data, <32 x i8>* %vaddr, align 1 21 ret void 22 } 23 24 define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) { 25 ; CHECK-LABEL: test_256_3: 26 ; CHECK: ## %bb.0: 27 ; CHECK-NEXT: vptestmb %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x26,0xc9] 28 ; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x07] 29 ; CHECK-NEXT: retq ## encoding: [0xc3] 30 %mask = icmp ne <32 x i8> %mask1, zeroinitializer 31 %vaddr = bitcast i8* %addr to <32 x i8>* 32 %r = load <32 x i8>, <32 x i8>* %vaddr, align 1 33 %res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> %old 34 ret <32 x i8>%res 35 } 36 37 define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) { 38 ; CHECK-LABEL: test_256_4: 39 ; CHECK: ## %bb.0: 40 ; CHECK-NEXT: vptestmb %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc8] 41 ; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x07] 42 ; CHECK-NEXT: retq ## encoding: [0xc3] 43 %mask = icmp ne <32 x i8> %mask1, zeroinitializer 44 %vaddr = bitcast i8* %addr to <32 x i8>* 45 %r = load <32 x i8>, <32 x i8>* %vaddr, align 1 46 %res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> zeroinitializer 47 ret <32 x i8>%res 48 } 49 50 define <16 x i16> @test_256_5(i8 * %addr) { 51 ; CHECK-LABEL: test_256_5: 52 ; CHECK: ## %bb.0: 53 ; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 54 ; CHECK-NEXT: retq ## encoding: [0xc3] 55 %vaddr = bitcast i8* %addr to <16 x i16>* 56 %res = load <16 x i16>, <16 x i16>* %vaddr, align 1 57 ret <16 x i16>%res 58 } 59 60 define void @test_256_6(i8 * %addr, <16 x i16> %data) { 61 ; CHECK-LABEL: test_256_6: 62 ; CHECK: ## %bb.0: 63 ; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 64 ; CHECK-NEXT: retq ## encoding: [0xc3] 65 %vaddr = bitcast i8* %addr to <16 x i16>* 66 store <16 x i16>%data, <16 x i16>* %vaddr, align 1 67 ret void 68 } 69 70 define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) { 71 ; CHECK-LABEL: test_256_7: 72 ; CHECK: ## %bb.0: 73 ; CHECK-NEXT: vptestmw %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x26,0xc9] 74 ; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x6f,0x07] 75 ; CHECK-NEXT: retq ## encoding: [0xc3] 76 %mask = icmp ne <16 x i16> %mask1, zeroinitializer 77 %vaddr = bitcast i8* %addr to <16 x i16>* 78 %r = load <16 x i16>, <16 x i16>* %vaddr, align 1 79 %res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> %old 80 ret <16 x i16>%res 81 } 82 83 define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) { 84 ; CHECK-LABEL: test_256_8: 85 ; CHECK: ## %bb.0: 86 ; CHECK-NEXT: vptestmw %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc8] 87 ; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x07] 88 ; CHECK-NEXT: retq ## encoding: [0xc3] 89 %mask = icmp ne <16 x i16> %mask1, zeroinitializer 90 %vaddr = bitcast i8* %addr to <16 x i16>* 91 %r = load <16 x i16>, <16 x i16>* %vaddr, align 1 92 %res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> zeroinitializer 93 ret <16 x i16>%res 94 } 95 96 define <16 x i8> @test_128_1(i8 * %addr) { 97 ; CHECK-LABEL: test_128_1: 98 ; CHECK: ## %bb.0: 99 ; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 100 ; CHECK-NEXT: retq ## encoding: [0xc3] 101 %vaddr = bitcast i8* %addr to <16 x i8>* 102 %res = load <16 x i8>, <16 x i8>* %vaddr, align 1 103 ret <16 x i8>%res 104 } 105 106 define void @test_128_2(i8 * %addr, <16 x i8> %data) { 107 ; CHECK-LABEL: test_128_2: 108 ; CHECK: ## %bb.0: 109 ; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 110 ; CHECK-NEXT: retq ## encoding: [0xc3] 111 %vaddr = bitcast i8* %addr to <16 x i8>* 112 store <16 x i8>%data, <16 x i8>* %vaddr, align 1 113 ret void 114 } 115 116 define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) { 117 ; CHECK-LABEL: test_128_3: 118 ; CHECK: ## %bb.0: 119 ; CHECK-NEXT: vptestmb %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x26,0xc9] 120 ; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x07] 121 ; CHECK-NEXT: retq ## encoding: [0xc3] 122 %mask = icmp ne <16 x i8> %mask1, zeroinitializer 123 %vaddr = bitcast i8* %addr to <16 x i8>* 124 %r = load <16 x i8>, <16 x i8>* %vaddr, align 1 125 %res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> %old 126 ret <16 x i8>%res 127 } 128 129 define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) { 130 ; CHECK-LABEL: test_128_4: 131 ; CHECK: ## %bb.0: 132 ; CHECK-NEXT: vptestmb %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc8] 133 ; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x07] 134 ; CHECK-NEXT: retq ## encoding: [0xc3] 135 %mask = icmp ne <16 x i8> %mask1, zeroinitializer 136 %vaddr = bitcast i8* %addr to <16 x i8>* 137 %r = load <16 x i8>, <16 x i8>* %vaddr, align 1 138 %res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> zeroinitializer 139 ret <16 x i8>%res 140 } 141 142 define <8 x i16> @test_128_5(i8 * %addr) { 143 ; CHECK-LABEL: test_128_5: 144 ; CHECK: ## %bb.0: 145 ; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 146 ; CHECK-NEXT: retq ## encoding: [0xc3] 147 %vaddr = bitcast i8* %addr to <8 x i16>* 148 %res = load <8 x i16>, <8 x i16>* %vaddr, align 1 149 ret <8 x i16>%res 150 } 151 152 define void @test_128_6(i8 * %addr, <8 x i16> %data) { 153 ; CHECK-LABEL: test_128_6: 154 ; CHECK: ## %bb.0: 155 ; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 156 ; CHECK-NEXT: retq ## encoding: [0xc3] 157 %vaddr = bitcast i8* %addr to <8 x i16>* 158 store <8 x i16>%data, <8 x i16>* %vaddr, align 1 159 ret void 160 } 161 162 define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) { 163 ; CHECK-LABEL: test_128_7: 164 ; CHECK: ## %bb.0: 165 ; CHECK-NEXT: vptestmw %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x26,0xc9] 166 ; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0x07] 167 ; CHECK-NEXT: retq ## encoding: [0xc3] 168 %mask = icmp ne <8 x i16> %mask1, zeroinitializer 169 %vaddr = bitcast i8* %addr to <8 x i16>* 170 %r = load <8 x i16>, <8 x i16>* %vaddr, align 1 171 %res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> %old 172 ret <8 x i16>%res 173 } 174 175 define <8 x i16> @test_128_8(i8 * %addr, <8 x i16> %mask1) { 176 ; CHECK-LABEL: test_128_8: 177 ; CHECK: ## %bb.0: 178 ; CHECK-NEXT: vptestmw %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc8] 179 ; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x6f,0x07] 180 ; CHECK-NEXT: retq ## encoding: [0xc3] 181 %mask = icmp ne <8 x i16> %mask1, zeroinitializer 182 %vaddr = bitcast i8* %addr to <8 x i16>* 183 %r = load <8 x i16>, <8 x i16>* %vaddr, align 1 184 %res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> zeroinitializer 185 ret <8 x i16>%res 186 } 187 188