Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
      2 
      3 ; An optimization in DAG Combiner to fold
      4 ; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...))
      5 ; will generate nodes like:
      6 ;     v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64.
      7 ; And such nodes will be defaultly scalarized in type legalization. But such
      8 ; scalarization will cause an assertion failure, as v1i64 is a legal type in
      9 ; AArch64. We change the default behaviour from be scalarized to be widen.
     10 
     11 ; FIXME: Currently XTN is generated for v1i32, but it can be optimized.
     12 ; Just like v1i16 and v1i8, there is no XTN generated.
     13 
     14 define <2 x i32> @test_v1i32_0(<1 x i64> %in0) {
     15 ; CHECK-LABEL: test_v1i32_0:
     16 ; CHECK: xtn v0.2s, v0.2d
     17   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
     18   %2 = trunc <2 x i64> %1 to <2 x i32>
     19   ret <2 x i32> %2
     20 }
     21 
     22 define <2 x i32> @test_v1i32_1(<1 x i64> %in0) {
     23 ; CHECK-LABEL: test_v1i32_1:
     24 ; CHECK: xtn v0.2s, v0.2d
     25 ; CHECK-NEXT: dup v0.2s, v0.s[0]
     26   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
     27   %2 = trunc <2 x i64> %1 to <2 x i32>
     28   ret <2 x i32> %2
     29 }
     30 
     31 define <4 x i16> @test_v1i16_0(<1 x i64> %in0) {
     32 ; CHECK-LABEL: test_v1i16_0:
     33 ; CHECK-NOT: xtn
     34   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
     35   %2 = trunc <4 x i64> %1 to <4 x i16>
     36   ret <4 x i16> %2
     37 }
     38 
     39 define <4 x i16> @test_v1i16_1(<1 x i64> %in0) {
     40 ; CHECK-LABEL: test_v1i16_1:
     41 ; CHECK-NOT: xtn
     42 ; CHECK: dup v0.4h, v0.h[0]
     43   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
     44   %2 = trunc <4 x i64> %1 to <4 x i16>
     45   ret <4 x i16> %2
     46 }
     47 
     48 define <8 x i8> @test_v1i8_0(<1 x i64> %in0) {
     49 ; CHECK-LABEL: test_v1i8_0:
     50 ; CHECK-NOT: xtn
     51   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     52   %2 = trunc <8 x i64> %1 to <8 x i8>
     53   ret <8 x i8> %2
     54 }
     55 
     56 define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
     57 ; CHECK-LABEL: test_v1i8_1:
     58 ; CHECK-NOT: xtn
     59 ; CHECK: dup v0.8b, v0.b[0]
     60   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     61   %2 = trunc <8 x i64> %1 to <8 x i8>
     62   ret <8 x i8> %2
     63 }
     64 
     65 ; PR20777: v1i1 is also problematic, but we can't widen it, so we extract_elt
     66 ; the i64 out of the v1i64 operand, and truncate that scalar instead.
     67 
     68 define <1 x i1> @test_v1i1_0(<1 x i64> %in0) {
     69 ; CHECK-LABEL: test_v1i1_0:
     70 ; CHECK: fmov w0, s0
     71   %1 = trunc <1 x i64> %in0 to <1 x i1>
     72   ret <1 x i1> %1
     73 }
     74 
     75 define i1 @test_v1i1_1(<1 x i64> %in0) {
     76 ; CHECK-LABEL: test_v1i1_1:
     77 ; CHECK: fmov [[REG:w[0-9]+]], s0
     78   %1 = trunc <1 x i64> %in0 to <1 x i1>
     79 ; CHECK: and w0, [[REG]], #0x1
     80   %2 = extractelement <1 x i1> %1, i32 0
     81   ret i1 %2
     82 }
     83