Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
      2 
      3 ; CHECK: f:
      4 define float @f(<4 x i16>* nocapture %in) {
      5   ; CHECK: vldr
      6   ; CHECK: vmovl.u16
      7   %1 = load <4 x i16>* %in
      8   ; CHECK: vcvt.f32.u32
      9   %2 = uitofp <4 x i16> %1 to <4 x float>
     10   %3 = extractelement <4 x float> %2, i32 0
     11   %4 = extractelement <4 x float> %2, i32 1
     12   %5 = extractelement <4 x float> %2, i32 2
     13 
     14   ; CHECK: vadd.f32
     15   %6 = fadd float %3, %4
     16   %7 = fadd float %6, %5
     17 
     18   ret float %7
     19 }
     20 
     21 ; CHECK: g:
     22 define float @g(<4 x i8>* nocapture %in) {
     23 ; Note: vld1 here is reasonably important. Mixing VFP and NEON
     24 ; instructions is bad on some cores
     25   ; CHECK: vld1
     26   ; CHECK: vmovl.u8
     27   ; CHECK: vmovl.u16
     28   %1 = load <4 x i8>* %in
     29   ; CHECK: vcvt.f32.u32
     30   %2 = uitofp <4 x i8> %1 to <4 x float>
     31   %3 = extractelement <4 x float> %2, i32 0
     32   %4 = extractelement <4 x float> %2, i32 1
     33   %5 = extractelement <4 x float> %2, i32 2
     34 
     35   ; CHECK: vadd.f32
     36   %6 = fadd float %3, %4
     37   %7 = fadd float %6, %5
     38 
     39   ret float %7
     40 }
     41 
     42 ; CHECK: h:
     43 define <4 x i8> @h(<4 x float> %v) {
     44   ; CHECK: vcvt.{{[us]}}32.f32
     45   ; CHECK: vmovn.i32
     46   %1 = fptoui <4 x float> %v to <4 x i8>
     47   ret <4 x i8> %1
     48 }
     49 
     50 ; CHECK: i:
     51 define <4 x i8> @i(<4 x i8>* %x) {
     52 ; Note: vld1 here is reasonably important. Mixing VFP and NEON
     53 ; instructions is bad on some cores
     54   ; CHECK: vld1
     55   ; CHECK: vmovl.s8
     56   ; CHECK: vmovl.s16
     57   ; CHECK: vrecpe
     58   ; CHECK: vrecps
     59   ; CHECK: vmul
     60   ; CHECK: vmovn
     61   %1 = load <4 x i8>* %x, align 4
     62   %2 = sdiv <4 x i8> zeroinitializer, %1
     63   ret <4 x i8> %2
     64 }
     65 ; CHECK: j:
     66 define <4 x i32> @j(<4 x i8>* %in) nounwind {
     67   ; CHECK: vld1
     68   ; CHECK: vmovl.u8
     69   ; CHECK: vmovl.u16
     70   ; CHECK-NOT: vand
     71   %1 = load <4 x i8>* %in, align 4
     72   %2 = zext <4 x i8> %1 to <4 x i32>
     73   ret <4 x i32> %2
     74 }
     75 
     76