1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 define <16 x float> @test_rsqrt28_ps(<16 x float> %a0) { 6 ; CHECK-LABEL: test_rsqrt28_ps: 7 ; CHECK: # %bb.0: 8 ; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0] 9 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 11 ret <16 x float> %res 12 } 13 14 define <16 x float> @test1_rsqrt28_ps(<16 x float> %a0, <16 x float> %a1) { 15 ; CHECK-LABEL: test1_rsqrt28_ps: 16 ; CHECK: # %bb.0: 17 ; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] 18 ; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 19 ; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8] 20 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 21 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 22 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i32 8) 23 ret <16 x float> %res 24 } 25 26 define <16 x float> @test2_rsqrt28_ps(<16 x float> %a0) { 27 ; CHECK-LABEL: test2_rsqrt28_ps: 28 ; CHECK: # %bb.0: 29 ; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] 30 ; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 31 ; CHECK-NEXT: vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0] 32 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 33 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 4) 34 ret <16 x float> %res 35 } 36 37 define <16 x float> @test3_rsqrt28_ps(<16 x float> %a0) { 38 ; CHECK-LABEL: test3_rsqrt28_ps: 39 ; CHECK: # %bb.0: 40 ; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] 41 ; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 42 ; CHECK-NEXT: vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0] 43 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 44 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 6, i32 4) 45 ret <16 x float> %res 46 } 47 48 define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) { 49 ; CHECK-LABEL: test4_rsqrt28_ps: 50 ; CHECK: # %bb.0: 51 ; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] 52 ; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 53 ; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0] 54 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 55 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 8) 56 ret <16 x float> %res 57 } 58 59 declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone 60 61 define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) { 62 ; CHECK-LABEL: test_rcp28_ps_512: 63 ; CHECK: # %bb.0: 64 ; CHECK-NEXT: vrcp28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0] 65 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 66 %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 67 ret <16 x float> %res 68 } 69 declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone 70 71 define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) { 72 ; CHECK-LABEL: test_rcp28_pd_512: 73 ; CHECK: # %bb.0: 74 ; CHECK-NEXT: vrcp28pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0] 75 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 76 %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) 77 ret <8 x double> %res 78 } 79 declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone 80 81 define <16 x float> @test_exp2_ps_512(<16 x float> %a0) { 82 ; CHECK-LABEL: test_exp2_ps_512: 83 ; CHECK: # %bb.0: 84 ; CHECK-NEXT: vexp2ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0] 85 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 86 %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 87 ret <16 x float> %res 88 } 89 declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone 90 91 define <8 x double> @test_exp2_pd_512(<8 x double> %a0) { 92 ; CHECK-LABEL: test_exp2_pd_512: 93 ; CHECK: # %bb.0: 94 ; CHECK-NEXT: vexp2pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0] 95 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 96 %res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) 97 ret <8 x double> %res 98 } 99 declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone 100 101 define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) { 102 ; CHECK-LABEL: test_rsqrt28_ss: 103 ; CHECK: # %bb.0: 104 ; CHECK-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0] 105 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 106 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1] 107 ret <4 x float> %res 108 } 109 declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 110 111 define <4 x float> @test_rcp28_ss(<4 x float> %a0) { 112 ; CHECK-LABEL: test_rcp28_ss: 113 ; CHECK: # %bb.0: 114 ; CHECK-NEXT: vrcp28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0] 115 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 116 %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1] 117 ret <4 x float> %res 118 } 119 declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 120 121 define <4 x float> @test_rcp28_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) { 122 ; X86-LABEL: test_rcp28_ss_load: 123 ; X86: # %bb.0: 124 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 125 ; X86-NEXT: vrcp28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x00] 126 ; X86-NEXT: retl # encoding: [0xc3] 127 ; 128 ; X64-LABEL: test_rcp28_ss_load: 129 ; X64: # %bb.0: 130 ; X64-NEXT: vrcp28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x07] 131 ; X64-NEXT: retq # encoding: [0xc3] 132 %a1 = load <4 x float>, <4 x float>* %a1ptr 133 %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1] 134 ret <4 x float> %res 135 } 136 137 define <4 x float> @test_rsqrt28_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) { 138 ; X86-LABEL: test_rsqrt28_ss_load: 139 ; X86: # %bb.0: 140 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 141 ; X86-NEXT: vrsqrt28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x00] 142 ; X86-NEXT: retl # encoding: [0xc3] 143 ; 144 ; X64-LABEL: test_rsqrt28_ss_load: 145 ; X64: # %bb.0: 146 ; X64-NEXT: vrsqrt28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x07] 147 ; X64-NEXT: retq # encoding: [0xc3] 148 %a1 = load <4 x float>, <4 x float>* %a1ptr 149 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1] 150 ret <4 x float> %res 151 } 152 153 define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0, i8 %mask) { 154 ; X86-LABEL: test_rsqrt28_ss_maskz: 155 ; X86: # %bb.0: 156 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 157 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 158 ; X86-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0] 159 ; X86-NEXT: retl # encoding: [0xc3] 160 ; 161 ; X64-LABEL: test_rsqrt28_ss_maskz: 162 ; X64: # %bb.0: 163 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 164 ; X64-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0] 165 ; X64-NEXT: retq # encoding: [0xc3] 166 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 %mask, i32 8) ; 167 ret <4 x float> %res 168 } 169 170 define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask) { 171 ; X86-LABEL: test_rsqrt28_ss_mask: 172 ; X86: # %bb.0: 173 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 174 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 175 ; X86-NEXT: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1] 176 ; X86-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 177 ; X86-NEXT: retl # encoding: [0xc3] 178 ; 179 ; X64-LABEL: test_rsqrt28_ss_mask: 180 ; X64: # %bb.0: 181 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 182 ; X64-NEXT: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1] 183 ; X64-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 184 ; X64-NEXT: retq # encoding: [0xc3] 185 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask, i32 8) ; 186 ret <4 x float> %res 187 } 188 189 define <2 x double> @test_rcp28_sd_mask_load(<2 x double> %a0, <2 x double>* %a1ptr, <2 x double> %a2, i8 %mask) { 190 ; X86-LABEL: test_rcp28_sd_mask_load: 191 ; X86: # %bb.0: 192 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08] 193 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 194 ; X86-NEXT: vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8] 195 ; X86-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 196 ; X86-NEXT: retl # encoding: [0xc3] 197 ; 198 ; X64-LABEL: test_rcp28_sd_mask_load: 199 ; X64: # %bb.0: 200 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 201 ; X64-NEXT: vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8] 202 ; X64-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 203 ; X64-NEXT: retq # encoding: [0xc3] 204 %a1 = load <2 x double>, <2 x double>* %a1ptr 205 %res = call <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a2, i8 %mask, i32 4) ; 206 ret <2 x double> %res 207 } 208 declare <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 209 210 define <2 x double> @test_rsqrt28_sd_maskz_load(<2 x double> %a0, <2 x double>* %a1ptr, i8 %mask) { 211 ; X86-LABEL: test_rsqrt28_sd_maskz_load: 212 ; X86: # %bb.0: 213 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08] 214 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 215 ; X86-NEXT: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0] 216 ; X86-NEXT: retl # encoding: [0xc3] 217 ; 218 ; X64-LABEL: test_rsqrt28_sd_maskz_load: 219 ; X64: # %bb.0: 220 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 221 ; X64-NEXT: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0] 222 ; X64-NEXT: retq # encoding: [0xc3] 223 %a1 = load <2 x double>, <2 x double>* %a1ptr 224 %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 4) ; 225 ret <2 x double> %res 226 } 227 228 define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0, i8 %mask) { 229 ; X86-LABEL: test_rsqrt28_sd_maskz: 230 ; X86: # %bb.0: 231 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 232 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 233 ; X86-NEXT: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0] 234 ; X86-NEXT: retl # encoding: [0xc3] 235 ; 236 ; X64-LABEL: test_rsqrt28_sd_maskz: 237 ; X64: # %bb.0: 238 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 239 ; X64-NEXT: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0] 240 ; X64-NEXT: retq # encoding: [0xc3] 241 %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 8) ; 242 ret <2 x double> %res 243 } 244 245 define <2 x double> @test_rsqrt28_sd_mask(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask) { 246 ; X86-LABEL: test_rsqrt28_sd_mask: 247 ; X86: # %bb.0: 248 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 249 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 250 ; X86-NEXT: vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1] 251 ; X86-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 252 ; X86-NEXT: retl # encoding: [0xc3] 253 ; 254 ; X64-LABEL: test_rsqrt28_sd_mask: 255 ; X64: # %bb.0: 256 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 257 ; X64-NEXT: vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1] 258 ; X64-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 259 ; X64-NEXT: retq # encoding: [0xc3] 260 %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask, i32 8) ; 261 ret <2 x double> %res 262 } 263 264 declare <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 265 266 define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, double* %ptr, i8 %mask) { 267 ; X86-LABEL: test_rsqrt28_sd_maskz_mem: 268 ; X86: # %bb.0: 269 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 270 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl # encoding: [0x8a,0x4c,0x24,0x08] 271 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 272 ; X86-NEXT: vrsqrt28sd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x00] 273 ; X86-NEXT: retl # encoding: [0xc3] 274 ; 275 ; X64-LABEL: test_rsqrt28_sd_maskz_mem: 276 ; X64: # %bb.0: 277 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 278 ; X64-NEXT: vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07] 279 ; X64-NEXT: retq # encoding: [0xc3] 280 %mem = load double , double * %ptr, align 8 281 %mem_v = insertelement <2 x double> undef, double %mem, i32 0 282 %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ; 283 ret <2 x double> %res 284 } 285 286 define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, double* %ptr, i8 %mask) { 287 ; X86-LABEL: test_rsqrt28_sd_maskz_mem_offset: 288 ; X86: # %bb.0: 289 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 290 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl # encoding: [0x8a,0x4c,0x24,0x08] 291 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 292 ; X86-NEXT: vrsqrt28sd 144(%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x40,0x12] 293 ; X86-NEXT: retl # encoding: [0xc3] 294 ; 295 ; X64-LABEL: test_rsqrt28_sd_maskz_mem_offset: 296 ; X64: # %bb.0: 297 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 298 ; X64-NEXT: vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12] 299 ; X64-NEXT: retq # encoding: [0xc3] 300 %ptr1 = getelementptr double, double* %ptr, i32 18 301 %mem = load double , double * %ptr1, align 8 302 %mem_v = insertelement <2 x double> undef, double %mem, i32 0 303 %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ; 304 ret <2 x double> %res 305 } 306 307