Home | History | Annotate | Download | only in llvm2ice_tests
      1 ; Some shufflevector optimized lowering. This list is by no means exhaustive. It
      2 ; is only a **basic** smoke test. the vector_ops crosstest has a broader range
      3 ; of test cases.
      4 
      5 ; RUN: %p2i -i %s --target=x8632 --filetype=obj --disassemble -a -O2 \
      6 ; RUN:     --allow-externally-defined-symbols | FileCheck %s --check-prefix=X86
      7 
      8 ; RUN: %if --need=target_MIPS32 --need=allow_dump \
      9 ; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target \
     10 ; RUN:   mips32 -i %s --args -O2 -allow-externally-defined-symbols \
     11 ; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
     12 ; RUN:   --command FileCheck --check-prefix MIPS32 %s
     13 
     14 declare void @useV4I32(<4 x i32> %t);
     15 
     16 define internal void @shuffleV4I32(<4 x i32> %a, <4 x i32> %b) {
     17 ; X86-LABEL: shuffleV4I32
     18   %a_0 = extractelement <4 x i32> %a, i32 0
     19   %a_1 = extractelement <4 x i32> %a, i32 1
     20   %a_2 = extractelement <4 x i32> %a, i32 2
     21   %a_3 = extractelement <4 x i32> %a, i32 3
     22 
     23   %b_0 = extractelement <4 x i32> %b, i32 0
     24   %b_1 = extractelement <4 x i32> %b, i32 1
     25   %b_2 = extractelement <4 x i32> %b, i32 2
     26   %b_3 = extractelement <4 x i32> %b, i32 3
     27 
     28   %t0_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
     29   %t0_1 = insertelement <4 x i32> %t0_0, i32 %b_0, i32 1
     30   %t0_2 = insertelement <4 x i32> %t0_1, i32 %a_1, i32 2
     31   %t0   = insertelement <4 x i32> %t0_2, i32 %b_1, i32 3
     32 ; X86: punpckldq {{.*}}
     33 
     34   call void @useV4I32(<4 x i32> %t0)
     35 ; X86: call
     36 
     37   %t1_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
     38   %t1_1 = insertelement <4 x i32> %t1_0, i32 %b_1, i32 1
     39   %t1_2 = insertelement <4 x i32> %t1_1, i32 %b_1, i32 2
     40   %t1   = insertelement <4 x i32> %t1_2, i32 %a_0, i32 3
     41 ; X86: shufps [[T:xmm[0-9]+]],{{.*}},0x10
     42 ; X86: pshufd {{.*}},[[T]],0x28
     43 
     44   call void @useV4I32(<4 x i32> %t1)
     45 ; X86: call
     46 
     47   %t2_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
     48   %t2_1 = insertelement <4 x i32> %t2_0, i32 %b_3, i32 1
     49   %t2_2 = insertelement <4 x i32> %t2_1, i32 %a_2, i32 2
     50   %t2   = insertelement <4 x i32> %t2_2, i32 %b_2, i32 3
     51 ; X86: shufps {{.*}},0x30
     52 ; X86: shufps {{.*}},0x22
     53 ; X86: shufps {{.*}},0x88
     54 
     55   call void @useV4I32(<4 x i32> %t2)
     56 ; X86: call
     57 
     58   ret void
     59 }
     60 ; MIPS32-LABEL: shuffleV4I32
     61 ; MIPS32: 	move
     62 ; MIPS32: 	move
     63 ; MIPS32: 	move
     64 ; MIPS32: 	move
     65 ; MIPS32: 	jal
     66 ; MIPS32: 	move
     67 ; MIPS32: 	move
     68 ; MIPS32: 	move
     69 ; MIPS32: 	move
     70 ; MIPS32: 	jal
     71 ; MIPS32: 	move
     72 ; MIPS32: 	move
     73 ; MIPS32: 	move
     74 ; MIPS32: 	move
     75 ; MIPS32: 	jal
     76