Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
      2 
      3 
      4 define void @fcmltz_4s(<4 x float> %a, <4 x i16>* %p) nounwind {
      5 ;CHECK-LABEL: fcmltz_4s:
      6 ;CHECK: fcmlt.4s [[REG:v[0-9]+]], v0, #0
      7 ;CHECK-NEXT: xtn.4h v[[REG_1:[0-9]+]], [[REG]]
      8 ;CHECK-NEXT: str d[[REG_1]], [x0]
      9 ;CHECK-NEXT: ret
     10   %tmp = fcmp olt <4 x float> %a, zeroinitializer
     11   %tmp2 = sext <4 x i1> %tmp to <4 x i16>
     12   store <4 x i16> %tmp2, <4 x i16>* %p, align 8
     13   ret void
     14 }
     15 
     16 define <2 x i32> @facge_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
     17 ;CHECK-LABEL: facge_2s:
     18 ;CHECK: facge.2s
     19 	%tmp1 = load <2 x float>, <2 x float>* %A
     20 	%tmp2 = load <2 x float>, <2 x float>* %B
     21 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
     22 	ret <2 x i32> %tmp3
     23 }
     24 
     25 define <4 x i32> @facge_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
     26 ;CHECK-LABEL: facge_4s:
     27 ;CHECK: facge.4s
     28 	%tmp1 = load <4 x float>, <4 x float>* %A
     29 	%tmp2 = load <4 x float>, <4 x float>* %B
     30 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
     31 	ret <4 x i32> %tmp3
     32 }
     33 
     34 define <2 x i64> @facge_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
     35 ;CHECK-LABEL: facge_2d:
     36 ;CHECK: facge.2d
     37 	%tmp1 = load <2 x double>, <2 x double>* %A
     38 	%tmp2 = load <2 x double>, <2 x double>* %B
     39 	%tmp3 = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
     40 	ret <2 x i64> %tmp3
     41 }
     42 
     43 declare <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
     44 declare <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
     45 declare <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone
     46 
     47 define <2 x i32> @facgt_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
     48 ;CHECK-LABEL: facgt_2s:
     49 ;CHECK: facgt.2s
     50 	%tmp1 = load <2 x float>, <2 x float>* %A
     51 	%tmp2 = load <2 x float>, <2 x float>* %B
     52 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
     53 	ret <2 x i32> %tmp3
     54 }
     55 
     56 define <4 x i32> @facgt_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
     57 ;CHECK-LABEL: facgt_4s:
     58 ;CHECK: facgt.4s
     59 	%tmp1 = load <4 x float>, <4 x float>* %A
     60 	%tmp2 = load <4 x float>, <4 x float>* %B
     61 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
     62 	ret <4 x i32> %tmp3
     63 }
     64 
     65 define <2 x i64> @facgt_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
     66 ;CHECK-LABEL: facgt_2d:
     67 ;CHECK: facgt.2d
     68 	%tmp1 = load <2 x double>, <2 x double>* %A
     69 	%tmp2 = load <2 x double>, <2 x double>* %B
     70 	%tmp3 = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
     71 	ret <2 x i64> %tmp3
     72 }
     73 
     74 declare <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
     75 declare <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
     76 declare <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone
     77 
     78 define i32 @facge_s(float %A, float %B) nounwind {
     79 ; CHECK-LABEL: facge_s:
     80 ; CHECK: facge {{s[0-9]+}}, s0, s1
     81   %mask = call i32 @llvm.aarch64.neon.facge.i32.f32(float %A, float %B)
     82   ret i32 %mask
     83 }
     84 
     85 define i64 @facge_d(double %A, double %B) nounwind {
     86 ; CHECK-LABEL: facge_d:
     87 ; CHECK: facge {{d[0-9]+}}, d0, d1
     88   %mask = call i64 @llvm.aarch64.neon.facge.i64.f64(double %A, double %B)
     89   ret i64 %mask
     90 }
     91 
     92 declare i64 @llvm.aarch64.neon.facge.i64.f64(double, double)
     93 declare i32 @llvm.aarch64.neon.facge.i32.f32(float, float)
     94 
     95 define i32 @facgt_s(float %A, float %B) nounwind {
     96 ; CHECK-LABEL: facgt_s:
     97 ; CHECK: facgt {{s[0-9]+}}, s0, s1
     98   %mask = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %A, float %B)
     99   ret i32 %mask
    100 }
    101 
    102 define i64 @facgt_d(double %A, double %B) nounwind {
    103 ; CHECK-LABEL: facgt_d:
    104 ; CHECK: facgt {{d[0-9]+}}, d0, d1
    105   %mask = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %A, double %B)
    106   ret i64 %mask
    107 }
    108 
    109 declare i64 @llvm.aarch64.neon.facgt.i64.f64(double, double)
    110 declare i32 @llvm.aarch64.neon.facgt.i32.f32(float, float)
    111 
    112 define <8 x i8> @cmtst_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    113 ;CHECK-LABEL: cmtst_8b:
    114 ;CHECK: cmtst.8b
    115   %tmp1 = load <8 x i8>, <8 x i8>* %A
    116   %tmp2 = load <8 x i8>, <8 x i8>* %B
    117   %commonbits = and <8 x i8> %tmp1, %tmp2
    118   %mask = icmp ne <8 x i8> %commonbits, zeroinitializer
    119   %res = sext <8 x i1> %mask to <8 x i8>
    120   ret <8 x i8> %res
    121 }
    122 
    123 define <16 x i8> @cmtst_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    124 ;CHECK-LABEL: cmtst_16b:
    125 ;CHECK: cmtst.16b
    126   %tmp1 = load <16 x i8>, <16 x i8>* %A
    127   %tmp2 = load <16 x i8>, <16 x i8>* %B
    128   %commonbits = and <16 x i8> %tmp1, %tmp2
    129   %mask = icmp ne <16 x i8> %commonbits, zeroinitializer
    130   %res = sext <16 x i1> %mask to <16 x i8>
    131   ret <16 x i8> %res
    132 }
    133 
    134 define <4 x i16> @cmtst_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    135 ;CHECK-LABEL: cmtst_4h:
    136 ;CHECK: cmtst.4h
    137   %tmp1 = load <4 x i16>, <4 x i16>* %A
    138   %tmp2 = load <4 x i16>, <4 x i16>* %B
    139   %commonbits = and <4 x i16> %tmp1, %tmp2
    140   %mask = icmp ne <4 x i16> %commonbits, zeroinitializer
    141   %res = sext <4 x i1> %mask to <4 x i16>
    142   ret <4 x i16> %res
    143 }
    144 
    145 define <8 x i16> @cmtst_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    146 ;CHECK-LABEL: cmtst_8h:
    147 ;CHECK: cmtst.8h
    148   %tmp1 = load <8 x i16>, <8 x i16>* %A
    149   %tmp2 = load <8 x i16>, <8 x i16>* %B
    150   %commonbits = and <8 x i16> %tmp1, %tmp2
    151   %mask = icmp ne <8 x i16> %commonbits, zeroinitializer
    152   %res = sext <8 x i1> %mask to <8 x i16>
    153   ret <8 x i16> %res
    154 }
    155 
    156 define <2 x i32> @cmtst_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    157 ;CHECK-LABEL: cmtst_2s:
    158 ;CHECK: cmtst.2s
    159   %tmp1 = load <2 x i32>, <2 x i32>* %A
    160   %tmp2 = load <2 x i32>, <2 x i32>* %B
    161   %commonbits = and <2 x i32> %tmp1, %tmp2
    162   %mask = icmp ne <2 x i32> %commonbits, zeroinitializer
    163   %res = sext <2 x i1> %mask to <2 x i32>
    164   ret <2 x i32> %res
    165 }
    166 
    167 define <4 x i32> @cmtst_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    168 ;CHECK-LABEL: cmtst_4s:
    169 ;CHECK: cmtst.4s
    170   %tmp1 = load <4 x i32>, <4 x i32>* %A
    171   %tmp2 = load <4 x i32>, <4 x i32>* %B
    172   %commonbits = and <4 x i32> %tmp1, %tmp2
    173   %mask = icmp ne <4 x i32> %commonbits, zeroinitializer
    174   %res = sext <4 x i1> %mask to <4 x i32>
    175   ret <4 x i32> %res
    176 }
    177 
    178 define <2 x i64> @cmtst_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    179 ;CHECK-LABEL: cmtst_2d:
    180 ;CHECK: cmtst.2d
    181   %tmp1 = load <2 x i64>, <2 x i64>* %A
    182   %tmp2 = load <2 x i64>, <2 x i64>* %B
    183   %commonbits = and <2 x i64> %tmp1, %tmp2
    184   %mask = icmp ne <2 x i64> %commonbits, zeroinitializer
    185   %res = sext <2 x i1> %mask to <2 x i64>
    186   ret <2 x i64> %res
    187 }
    188 
    189 define <1 x i64> @fcmeq_d(<1 x double> %A, <1 x double> %B) nounwind {
    190 ; CHECK-LABEL: fcmeq_d:
    191 ; CHECK: fcmeq {{d[0-9]+}}, d0, d1
    192   %tst = fcmp oeq <1 x double> %A, %B
    193   %mask = sext <1 x i1> %tst to <1 x i64>
    194   ret <1 x i64> %mask
    195 }
    196 
    197 define <1 x i64> @fcmge_d(<1 x double> %A, <1 x double> %B) nounwind {
    198 ; CHECK-LABEL: fcmge_d:
    199 ; CHECK: fcmge {{d[0-9]+}}, d0, d1
    200   %tst = fcmp oge <1 x double> %A, %B
    201   %mask = sext <1 x i1> %tst to <1 x i64>
    202   ret <1 x i64> %mask
    203 }
    204 
    205 define <1 x i64> @fcmle_d(<1 x double> %A, <1 x double> %B) nounwind {
    206 ; CHECK-LABEL: fcmle_d:
    207 ; CHECK: fcmge {{d[0-9]+}}, d1, d0
    208   %tst = fcmp ole <1 x double> %A, %B
    209   %mask = sext <1 x i1> %tst to <1 x i64>
    210   ret <1 x i64> %mask
    211 }
    212 
    213 define <1 x i64> @fcmgt_d(<1 x double> %A, <1 x double> %B) nounwind {
    214 ; CHECK-LABEL: fcmgt_d:
    215 ; CHECK: fcmgt {{d[0-9]+}}, d0, d1
    216   %tst = fcmp ogt <1 x double> %A, %B
    217   %mask = sext <1 x i1> %tst to <1 x i64>
    218   ret <1 x i64> %mask
    219 }
    220 
    221 define <1 x i64> @fcmlt_d(<1 x double> %A, <1 x double> %B) nounwind {
    222 ; CHECK-LABEL: fcmlt_d:
    223 ; CHECK: fcmgt {{d[0-9]+}}, d1, d0
    224   %tst = fcmp olt <1 x double> %A, %B
    225   %mask = sext <1 x i1> %tst to <1 x i64>
    226   ret <1 x i64> %mask
    227 }
    228 
    229 define <1 x i64> @cmnez_d(<1 x i64> %A) nounwind {
    230 ; CHECK-LABEL: cmnez_d:
    231 ; CHECK: cmeq d[[EQ:[0-9]+]], d0, #0
    232 ; CHECK: mvn.8b v0, v[[EQ]]
    233   %tst = icmp ne <1 x i64> %A, zeroinitializer
    234   %mask = sext <1 x i1> %tst to <1 x i64>
    235   ret <1 x i64> %mask
    236 }
    237