Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 define <16 x float> @test_rsqrt28_ps(<16 x float> %a0) {
      6 ; CHECK-LABEL: test_rsqrt28_ps:
      7 ; CHECK:       # %bb.0:
      8 ; CHECK-NEXT:    vrsqrt28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
      9 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     10   %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
     11   ret <16 x float> %res
     12 }
     13 
     14 define <16 x float> @test1_rsqrt28_ps(<16 x float> %a0, <16 x float> %a1) {
     15 ; CHECK-LABEL: test1_rsqrt28_ps:
     16 ; CHECK:       # %bb.0:
     17 ; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
     18 ; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
     19 ; CHECK-NEXT:    vrsqrt28ps {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8]
     20 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
     21 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     22   %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i32 8)
     23   ret <16 x float> %res
     24 }
     25 
     26 define <16 x float> @test2_rsqrt28_ps(<16 x float> %a0) {
     27 ; CHECK-LABEL: test2_rsqrt28_ps:
     28 ; CHECK:       # %bb.0:
     29 ; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
     30 ; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
     31 ; CHECK-NEXT:    vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0]
     32 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     33   %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 4)
     34   ret <16 x float> %res
     35 }
     36 
     37 define <16 x float> @test3_rsqrt28_ps(<16 x float> %a0) {
     38 ; CHECK-LABEL: test3_rsqrt28_ps:
     39 ; CHECK:       # %bb.0:
     40 ; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
     41 ; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
     42 ; CHECK-NEXT:    vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0]
     43 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     44   %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 6, i32 4)
     45   ret <16 x float> %res
     46 }
     47 
     48 define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) {
     49 ; CHECK-LABEL: test4_rsqrt28_ps:
     50 ; CHECK:       # %bb.0:
     51 ; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
     52 ; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
     53 ; CHECK-NEXT:    vrsqrt28ps {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0]
     54 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     55   %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 8)
     56   ret <16 x float> %res
     57 }
     58 
     59 declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
     60 
     61 define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
     62 ; CHECK-LABEL: test_rcp28_ps_512:
     63 ; CHECK:       # %bb.0:
     64 ; CHECK-NEXT:    vrcp28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
     65 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     66   %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
     67   ret <16 x float> %res
     68 }
     69 declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
     70 
     71 define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
     72 ; CHECK-LABEL: test_rcp28_pd_512:
     73 ; CHECK:       # %bb.0:
     74 ; CHECK-NEXT:    vrcp28pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
     75 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     76   %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
     77   ret <8 x double> %res
     78 }
     79 declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
     80 
     81 define <16 x float> @test_exp2_ps_512(<16 x float> %a0) {
     82 ; CHECK-LABEL: test_exp2_ps_512:
     83 ; CHECK:       # %bb.0:
     84 ; CHECK-NEXT:    vexp2ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0]
     85 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     86   %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
     87   ret <16 x float> %res
     88 }
     89 declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
     90 
     91 define <8 x double> @test_exp2_pd_512(<8 x double> %a0) {
     92 ; CHECK-LABEL: test_exp2_pd_512:
     93 ; CHECK:       # %bb.0:
     94 ; CHECK-NEXT:    vexp2pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0]
     95 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     96   %res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
     97   ret <8 x double> %res
     98 }
     99 declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
    100 
    101 define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
    102 ; CHECK-LABEL: test_rsqrt28_ss:
    103 ; CHECK:       # %bb.0:
    104 ; CHECK-NEXT:    vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
    105 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    106   %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
    107   ret <4 x float> %res
    108 }
    109 declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
    110 
    111 define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
    112 ; CHECK-LABEL: test_rcp28_ss:
    113 ; CHECK:       # %bb.0:
    114 ; CHECK-NEXT:    vrcp28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
    115 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    116   %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
    117   ret <4 x float> %res
    118 }
    119 declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
    120 
    121 define <4 x float> @test_rcp28_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) {
    122 ; X86-LABEL: test_rcp28_ss_load:
    123 ; X86:       # %bb.0:
    124 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    125 ; X86-NEXT:    vrcp28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x00]
    126 ; X86-NEXT:    retl # encoding: [0xc3]
    127 ;
    128 ; X64-LABEL: test_rcp28_ss_load:
    129 ; X64:       # %bb.0:
    130 ; X64-NEXT:    vrcp28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x07]
    131 ; X64-NEXT:    retq # encoding: [0xc3]
    132   %a1 = load <4 x float>, <4 x float>* %a1ptr
    133   %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1]
    134   ret <4 x float> %res
    135 }
    136 
    137 define <4 x float> @test_rsqrt28_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) {
    138 ; X86-LABEL: test_rsqrt28_ss_load:
    139 ; X86:       # %bb.0:
    140 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    141 ; X86-NEXT:    vrsqrt28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x00]
    142 ; X86-NEXT:    retl # encoding: [0xc3]
    143 ;
    144 ; X64-LABEL: test_rsqrt28_ss_load:
    145 ; X64:       # %bb.0:
    146 ; X64-NEXT:    vrsqrt28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x07]
    147 ; X64-NEXT:    retq # encoding: [0xc3]
    148   %a1 = load <4 x float>, <4 x float>* %a1ptr
    149   %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1]
    150   ret <4 x float> %res
    151 }
    152 
    153 define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0, i8 %mask) {
    154 ; X86-LABEL: test_rsqrt28_ss_maskz:
    155 ; X86:       # %bb.0:
    156 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
    157 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    158 ; X86-NEXT:    vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
    159 ; X86-NEXT:    retl # encoding: [0xc3]
    160 ;
    161 ; X64-LABEL: test_rsqrt28_ss_maskz:
    162 ; X64:       # %bb.0:
    163 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    164 ; X64-NEXT:    vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
    165 ; X64-NEXT:    retq # encoding: [0xc3]
    166   %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 %mask, i32 8) ;
    167   ret <4 x float> %res
    168 }
    169 
    170 define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask) {
    171 ; X86-LABEL: test_rsqrt28_ss_mask:
    172 ; X86:       # %bb.0:
    173 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
    174 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    175 ; X86-NEXT:    vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
    176 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
    177 ; X86-NEXT:    retl # encoding: [0xc3]
    178 ;
    179 ; X64-LABEL: test_rsqrt28_ss_mask:
    180 ; X64:       # %bb.0:
    181 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    182 ; X64-NEXT:    vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
    183 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
    184 ; X64-NEXT:    retq # encoding: [0xc3]
    185   %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask, i32 8) ;
    186   ret <4 x float> %res
    187 }
    188 
    189 define <2 x double> @test_rcp28_sd_mask_load(<2 x double> %a0, <2 x double>* %a1ptr, <2 x double> %a2, i8 %mask) {
    190 ; X86-LABEL: test_rcp28_sd_mask_load:
    191 ; X86:       # %bb.0:
    192 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08]
    193 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    194 ; X86-NEXT:    vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8]
    195 ; X86-NEXT:    vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
    196 ; X86-NEXT:    retl # encoding: [0xc3]
    197 ;
    198 ; X64-LABEL: test_rcp28_sd_mask_load:
    199 ; X64:       # %bb.0:
    200 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    201 ; X64-NEXT:    vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8]
    202 ; X64-NEXT:    vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
    203 ; X64-NEXT:    retq # encoding: [0xc3]
    204   %a1 = load <2 x double>, <2 x double>* %a1ptr
    205   %res = call <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a2, i8 %mask, i32 4) ;
    206   ret <2 x double> %res
    207 }
    208 declare <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
    209 
    210 define <2 x double> @test_rsqrt28_sd_maskz_load(<2 x double> %a0, <2 x double>* %a1ptr, i8 %mask) {
    211 ; X86-LABEL: test_rsqrt28_sd_maskz_load:
    212 ; X86:       # %bb.0:
    213 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08]
    214 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    215 ; X86-NEXT:    vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0]
    216 ; X86-NEXT:    retl # encoding: [0xc3]
    217 ;
    218 ; X64-LABEL: test_rsqrt28_sd_maskz_load:
    219 ; X64:       # %bb.0:
    220 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    221 ; X64-NEXT:    vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0]
    222 ; X64-NEXT:    retq # encoding: [0xc3]
    223   %a1 = load <2 x double>, <2 x double>* %a1ptr
    224   %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
    225   ret <2 x double> %res
    226 }
    227 
    228 define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0, i8 %mask) {
    229 ; X86-LABEL: test_rsqrt28_sd_maskz:
    230 ; X86:       # %bb.0:
    231 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
    232 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    233 ; X86-NEXT:    vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
    234 ; X86-NEXT:    retl # encoding: [0xc3]
    235 ;
    236 ; X64-LABEL: test_rsqrt28_sd_maskz:
    237 ; X64:       # %bb.0:
    238 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    239 ; X64-NEXT:    vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
    240 ; X64-NEXT:    retq # encoding: [0xc3]
    241   %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 8) ;
    242   ret <2 x double> %res
    243 }
    244 
    245 define <2 x double> @test_rsqrt28_sd_mask(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask) {
    246 ; X86-LABEL: test_rsqrt28_sd_mask:
    247 ; X86:       # %bb.0:
    248 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
    249 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    250 ; X86-NEXT:    vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1]
    251 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
    252 ; X86-NEXT:    retl # encoding: [0xc3]
    253 ;
    254 ; X64-LABEL: test_rsqrt28_sd_mask:
    255 ; X64:       # %bb.0:
    256 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    257 ; X64-NEXT:    vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1]
    258 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
    259 ; X64-NEXT:    retq # encoding: [0xc3]
    260   %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask, i32 8) ;
    261   ret <2 x double> %res
    262 }
    263 
    264 declare <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
    265 
    266 define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, double* %ptr, i8 %mask) {
    267 ; X86-LABEL: test_rsqrt28_sd_maskz_mem:
    268 ; X86:       # %bb.0:
    269 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    270 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl # encoding: [0x8a,0x4c,0x24,0x08]
    271 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
    272 ; X86-NEXT:    vrsqrt28sd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x00]
    273 ; X86-NEXT:    retl # encoding: [0xc3]
    274 ;
    275 ; X64-LABEL: test_rsqrt28_sd_maskz_mem:
    276 ; X64:       # %bb.0:
    277 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    278 ; X64-NEXT:    vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07]
    279 ; X64-NEXT:    retq # encoding: [0xc3]
    280   %mem = load double , double * %ptr, align 8
    281   %mem_v = insertelement <2 x double> undef, double %mem, i32 0
    282   %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
    283   ret <2 x double> %res
    284 }
    285 
    286 define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, double* %ptr, i8 %mask) {
    287 ; X86-LABEL: test_rsqrt28_sd_maskz_mem_offset:
    288 ; X86:       # %bb.0:
    289 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    290 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl # encoding: [0x8a,0x4c,0x24,0x08]
    291 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
    292 ; X86-NEXT:    vrsqrt28sd 144(%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x40,0x12]
    293 ; X86-NEXT:    retl # encoding: [0xc3]
    294 ;
    295 ; X64-LABEL: test_rsqrt28_sd_maskz_mem_offset:
    296 ; X64:       # %bb.0:
    297 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    298 ; X64-NEXT:    vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12]
    299 ; X64-NEXT:    retq # encoding: [0xc3]
    300   %ptr1 = getelementptr double, double* %ptr, i32 18
    301   %mem = load double , double * %ptr1, align 8
    302   %mem_v = insertelement <2 x double> undef, double %mem, i32 0
    303   %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
    304   ret <2 x double> %res
    305 }
    306 
    307