Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
      2 
      3 ; CHECK-LABEL: f:
      4 define float @f(<4 x i16>* nocapture %in) {
      5   ; CHECK: vld1
      6   ; CHECK: vmovl.u16
      7   ; CHECK-NOT: vand
      8   %1 = load <4 x i16>, <4 x i16>* %in
      9   ; CHECK: vcvt.f32.u32
     10   %2 = uitofp <4 x i16> %1 to <4 x float>
     11   %3 = extractelement <4 x float> %2, i32 0
     12   %4 = extractelement <4 x float> %2, i32 1
     13   %5 = extractelement <4 x float> %2, i32 2
     14 
     15   ; CHECK: vadd.f32
     16   %6 = fadd float %3, %4
     17   %7 = fadd float %6, %5
     18 
     19   ret float %7
     20 }
     21 
     22 ; CHECK-LABEL: g:
     23 define float @g(<4 x i16>* nocapture %in) {
     24   ; CHECK: vldr
     25   %1 = load <4 x i16>, <4 x i16>* %in
     26 
     27   ; For now we're generating a vmov.16 and a uxth instruction.
     28   ; The uxth is redundant, and we should be able to extend without
     29   ; having to generate cross-domain copies. Once we can do this
     30   ; we should modify the checks below.
     31 
     32   ; CHECK: uxth
     33   %2 = extractelement <4 x i16> %1, i32 0
     34   ; CHECK: vcvt.f32.u32
     35   %3 = uitofp i16 %2 to float
     36   ret float %3
     37 }
     38 
     39 ; The backend generates for the following code an
     40 ; (and 0xff (i32 extract_vector_elt (zext load <4 x i8> to 4 x i16)))
     41 ;
     42 ; The and is not redundant and cannot be removed. Since
     43 ; extract_vector_elt is doing an implicit any_ext, the and
     44 ; is required to guarantee that the top bits are set to zero.
     45 
     46 ; Ideally should be a zext from <4 x i8> to <4 x 32>.
     47 
     48 ; CHECK-LABEL: h:
     49 ; CHECK: vld1.32
     50 ; CHECK: uxtb
     51 define <4 x i32> @h(<4 x i8> *%in) {
     52   %1 = load <4 x i8>, <4 x i8>* %in, align 4
     53   %2 = extractelement <4 x i8> %1, i32 0
     54   %3 = zext i8 %2 to i32
     55   %4 = insertelement <4 x i32> undef, i32 %3, i32 0
     56   %5 = extractelement <4 x i8> %1, i32 1
     57   %6 = zext i8 %5 to i32
     58   %7 = insertelement <4 x i32> %4, i32 %6, i32 1
     59   %8 = extractelement <4 x i8> %1, i32 2
     60   %9 = zext i8 %8 to i32
     61   %10 = insertelement <4 x i32> %7, i32 %9, i32 2
     62   %11 = extractelement <4 x i8> %1, i32 3
     63   %12 = zext i8 %11 to i32
     64   %13 = insertelement <4 x i32> %10, i32 %12, i32 3
     65   ret <4 x i32> %13
     66 }
     67