Home | History | Annotate | Download | only in PowerPC
      1 ; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
      2 
      3 ; Test swap removal when a vector splat must be adjusted to make it legal.
      4 ;
      5 
      6 ; LH: 2016-11-17
      7 ;   Updated align attritue from 16 to 8 to keep swap instructions tests.
      8 ;   Changes have been made on little-endian to use lvx and stvx
      9 ;   instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for
     10 ;   aligned vectors with elements up to 4 bytes
     11 
     12 ; Test generated from following C code:
     13 ;
     14 ; vector char vc = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
     15 ; vector char vcr;
     16 ; vector short vs = {0, 1, 2, 3, 4, 5, 6, 7};
     17 ; vector short vsr;
     18 ; vector int vi = {0, 1, 2, 3};
     19 ; vector int vir;
     20 ;
     21 ; void cfoo ()
     22 ; {
     23 ;   vcr = (vector char){vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5],
     24 ;                       vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5]};
     25 ; }
     26 ;
     27 ; void sfoo ()
     28 ; {
     29 ;   vsr = (vector short){vs[6], vs[6], vs[6], vs[6],
     30 ;                        vs[6], vs[6], vs[6], vs[6]};
     31 ; }
     32 ;
     33 ; void ifoo ()
     34 ; {
     35 ;   vir = (vector int){vi[1], vi[1], vi[1], vi[1]};
     36 ; }
     37 
     38 @vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 8
     39 @vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 8
     40 @vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 8
     41 @vcr = common global <16 x i8> zeroinitializer, align 8
     42 @vsr = common global <8 x i16> zeroinitializer, align 8
     43 @vir = common global <4 x i32> zeroinitializer, align 8
     44 
     45 ; Function Attrs: nounwind
     46 define void @cfoo() {
     47 entry:
     48   %0 = load <16 x i8>, <16 x i8>* @vc, align 8
     49   %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
     50   store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 8
     51   ret void
     52 }
     53 
     54 ; Function Attrs: nounwind
     55 define void @sfoo() {
     56 entry:
     57   %0 = load <8 x i16>, <8 x i16>* @vs, align 8
     58   %vecinit14 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
     59   store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 8
     60   ret void
     61 }
     62 
     63 ; Function Attrs: nounwind
     64 define void @ifoo() {
     65 entry:
     66   %0 = load <4 x i32>, <4 x i32>* @vi, align 8
     67   %vecinit6 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
     68   store <4 x i32> %vecinit6, <4 x i32>* @vir, align 8
     69   ret void
     70 }
     71 
     72 ; Justification:
     73 ;  Byte splat of element 5 (BE) becomes element 15-5 = 10 (LE)
     74 ;  which becomes (10+8)%16 = 2 (LE swapped).
     75 ;
     76 ;  Halfword splat of element 6 (BE) becomes element 7-6 = 1 (LE)
     77 ;  which becomes (1+4)%8 = 5 (LE swapped).
     78 ;
     79 ;  Word splat of element 1 (BE) becomes element 3-1 = 2 (LE)
     80 ;  which becomes (2+2)%4 = 0 (LE swapped).
     81 
     82 ; CHECK-NOT: xxpermdi
     83 ; CHECK-NOT: xxswapd
     84 
     85 ; CHECK-LABEL: @cfoo
     86 ; CHECK: lxvd2x
     87 ; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 2
     88 ; CHECK: stxvd2x
     89 
     90 ; CHECK-LABEL: @sfoo
     91 ; CHECK: lxvd2x
     92 ; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 5
     93 ; CHECK: stxvd2x
     94 
     95 ; CHECK-LABEL: @ifoo
     96 ; CHECK: lxvd2x
     97 ; CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0
     98 ; CHECK: stxvd2x
     99