Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 ; rdar://12471808
      4 
      5 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
      6 ;CHECK-LABEL: v_bsli8:
      7 ;CHECK: vbsl
      8 	%tmp1 = load <8 x i8>* %A
      9 	%tmp2 = load <8 x i8>* %B
     10 	%tmp3 = load <8 x i8>* %C
     11 	%tmp4 = and <8 x i8> %tmp1, %tmp2
     12 	%tmp5 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
     13 	%tmp6 = and <8 x i8> %tmp5, %tmp3
     14 	%tmp7 = or <8 x i8> %tmp4, %tmp6
     15 	ret <8 x i8> %tmp7
     16 }
     17 
     18 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
     19 ;CHECK-LABEL: v_bsli16:
     20 ;CHECK: vbsl
     21 	%tmp1 = load <4 x i16>* %A
     22 	%tmp2 = load <4 x i16>* %B
     23 	%tmp3 = load <4 x i16>* %C
     24 	%tmp4 = and <4 x i16> %tmp1, %tmp2
     25 	%tmp5 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
     26 	%tmp6 = and <4 x i16> %tmp5, %tmp3
     27 	%tmp7 = or <4 x i16> %tmp4, %tmp6
     28 	ret <4 x i16> %tmp7
     29 }
     30 
     31 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
     32 ;CHECK-LABEL: v_bsli32:
     33 ;CHECK: vbsl
     34 	%tmp1 = load <2 x i32>* %A
     35 	%tmp2 = load <2 x i32>* %B
     36 	%tmp3 = load <2 x i32>* %C
     37 	%tmp4 = and <2 x i32> %tmp1, %tmp2
     38 	%tmp5 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
     39 	%tmp6 = and <2 x i32> %tmp5, %tmp3
     40 	%tmp7 = or <2 x i32> %tmp4, %tmp6
     41 	ret <2 x i32> %tmp7
     42 }
     43 
     44 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
     45 ;CHECK-LABEL: v_bsli64:
     46 ;CHECK: vbsl
     47 	%tmp1 = load <1 x i64>* %A
     48 	%tmp2 = load <1 x i64>* %B
     49 	%tmp3 = load <1 x i64>* %C
     50 	%tmp4 = and <1 x i64> %tmp1, %tmp2
     51 	%tmp5 = xor <1 x i64> %tmp1, < i64 -1 >
     52 	%tmp6 = and <1 x i64> %tmp5, %tmp3
     53 	%tmp7 = or <1 x i64> %tmp4, %tmp6
     54 	ret <1 x i64> %tmp7
     55 }
     56 
     57 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
     58 ;CHECK-LABEL: v_bslQi8:
     59 ;CHECK: vbsl
     60 	%tmp1 = load <16 x i8>* %A
     61 	%tmp2 = load <16 x i8>* %B
     62 	%tmp3 = load <16 x i8>* %C
     63 	%tmp4 = and <16 x i8> %tmp1, %tmp2
     64 	%tmp5 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
     65 	%tmp6 = and <16 x i8> %tmp5, %tmp3
     66 	%tmp7 = or <16 x i8> %tmp4, %tmp6
     67 	ret <16 x i8> %tmp7
     68 }
     69 
     70 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
     71 ;CHECK-LABEL: v_bslQi16:
     72 ;CHECK: vbsl
     73 	%tmp1 = load <8 x i16>* %A
     74 	%tmp2 = load <8 x i16>* %B
     75 	%tmp3 = load <8 x i16>* %C
     76 	%tmp4 = and <8 x i16> %tmp1, %tmp2
     77 	%tmp5 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
     78 	%tmp6 = and <8 x i16> %tmp5, %tmp3
     79 	%tmp7 = or <8 x i16> %tmp4, %tmp6
     80 	ret <8 x i16> %tmp7
     81 }
     82 
     83 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
     84 ;CHECK-LABEL: v_bslQi32:
     85 ;CHECK: vbsl
     86 	%tmp1 = load <4 x i32>* %A
     87 	%tmp2 = load <4 x i32>* %B
     88 	%tmp3 = load <4 x i32>* %C
     89 	%tmp4 = and <4 x i32> %tmp1, %tmp2
     90 	%tmp5 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
     91 	%tmp6 = and <4 x i32> %tmp5, %tmp3
     92 	%tmp7 = or <4 x i32> %tmp4, %tmp6
     93 	ret <4 x i32> %tmp7
     94 }
     95 
     96 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
     97 ;CHECK-LABEL: v_bslQi64:
     98 ;CHECK: vbsl
     99 	%tmp1 = load <2 x i64>* %A
    100 	%tmp2 = load <2 x i64>* %B
    101 	%tmp3 = load <2 x i64>* %C
    102 	%tmp4 = and <2 x i64> %tmp1, %tmp2
    103 	%tmp5 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
    104 	%tmp6 = and <2 x i64> %tmp5, %tmp3
    105 	%tmp7 = or <2 x i64> %tmp4, %tmp6
    106 	ret <2 x i64> %tmp7
    107 }
    108 
    109 define <8 x i8> @f1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind readnone optsize ssp {
    110 ; CHECK-LABEL: f1:
    111 ; CHECK: vbsl
    112   %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind
    113   ret <8 x i8> %vbsl.i
    114 }
    115 
    116 define <4 x i16> @f2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
    117 ; CHECK-LABEL: f2:
    118 ; CHECK: vbsl
    119   %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind
    120   ret <4 x i16> %vbsl3.i
    121 }
    122 
    123 define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
    124 ; CHECK-LABEL: f3:
    125 ; CHECK: vbsl
    126   %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind
    127   ret <2 x i32> %vbsl3.i
    128 }
    129 
    130 define <2 x float> @f4(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone optsize ssp {
    131 ; CHECK-LABEL: f4:
    132 ; CHECK: vbsl
    133   %vbsl4.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
    134   ret <2 x float> %vbsl4.i
    135 }
    136 
    137 define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp {
    138 ; CHECK-LABEL: g1:
    139 ; CHECK: vbsl
    140   %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind
    141   ret <16 x i8> %vbsl.i
    142 }
    143 
    144 define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone optsize ssp {
    145 ; CHECK-LABEL: g2:
    146 ; CHECK: vbsl
    147   %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind
    148   ret <8 x i16> %vbsl3.i
    149 }
    150 
    151 define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
    152 ; CHECK-LABEL: g3:
    153 ; CHECK: vbsl
    154   %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind
    155   ret <4 x i32> %vbsl3.i
    156 }
    157 
    158 define <4 x float> @g4(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone optsize ssp {
    159 ; CHECK-LABEL: g4:
    160 ; CHECK: vbsl
    161   %vbsl4.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind
    162   ret <4 x float> %vbsl4.i
    163 }
    164 
    165 define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp {
    166 ; CHECK-LABEL: test_vbsl_s64:
    167 ; CHECK: vbsl d
    168   %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
    169   ret <1 x i64> %vbsl3.i
    170 }
    171 
    172 define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp {
    173 ; CHECK-LABEL: test_vbsl_u64:
    174 ; CHECK: vbsl d
    175   %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
    176   ret <1 x i64> %vbsl3.i
    177 }
    178 
    179 define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
    180 ; CHECK-LABEL: test_vbslq_s64:
    181 ; CHECK: vbsl q
    182   %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
    183   ret <2 x i64> %vbsl3.i
    184 }
    185 
    186 define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
    187 ; CHECK-LABEL: test_vbslq_u64:
    188 ; CHECK: vbsl q
    189   %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
    190   ret <2 x i64> %vbsl3.i
    191 }
    192 
    193 declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
    194 declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
    195 declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
    196 declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
    197 declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
    198 declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
    199 declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
    200 declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    201 declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
    202 declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) nounwind readnone
    203