Home | History | Annotate | Download | only in neon
      1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
      2 target triple = "armv7-none-linux-gnueabi"
      3 
      4 define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline {
      5   %1 = insertelement <4 x float> undef, float %in, i32 0
      6   %2 = insertelement <4 x float> %1, float %in, i32 1
      7   %3 = insertelement <4 x float> %2, float %in, i32 2
      8   %4 = insertelement <4 x float> %3, float %in, i32 3
      9   ret <4 x float> %4
     10 }
     11 
     12 define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline {
     13   %1 = insertelement <2 x float> undef, float %in, i32 0
     14   %2 = insertelement <2 x float> %1, float %in, i32 1
     15   ret <2 x float> %2
     16 }
     17 
     18 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
     19 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
     20 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
     21 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
     22 
     23 define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) nounwind readonly {
     24   %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounwind readnone
     25   %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind readnone
     26   ret <4 x float> %2
     27 }
     28 
     29 define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly {
     30   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
     31   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
     32   %out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float> %_high) nounwind readonly
     33   ret <4 x float> %out
     34 }
     35 
     36 define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) nounwind readonly {
     37   %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     38   %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     39   %_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     40   %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone
     41   %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone
     42   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
     43   ret <3 x float> %c
     44 }
     45 
     46 define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly {
     47   %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     48   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
     49   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
     50   %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone
     51   %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone
     52   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
     53   ret <3 x float> %c
     54 }
     55 
     56 
     57 define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) nounwind readonly {
     58   %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounwind readnone
     59   %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind readnone
     60   ret <2 x float> %2
     61 }
     62 
     63 define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly {
     64   %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
     65   %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
     66   %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) nounwind readnone
     67   %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone
     68   ret <2 x float> %b
     69 }
     70 
     71 
     72 define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly {
     73   %_value = tail call <2 x float> @smear_2f(float %value) nounwind readnone
     74   %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
     75   %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
     76   %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %_value, <2 x float> %_high) nounwind readnone
     77   %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone
     78   %c = extractelement <2 x float> %b, i32 0
     79   ret float %c
     80 }
     81 
     82