Home | History | Annotate | Download | only in ARM
      1 ; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
      2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      3 ; Make sure that ARM backend with NEON handles vselect.
      4 
      5 define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
      6 ; CHECK: vcgt.s32 [[QR:q[0-9]+]], [[Q1:q[0-9]+]], [[Q2:q[0-9]+]]
      7 ; CHECK: vbsl [[QR]], [[Q1]], [[Q2]]
      8     %cmpres = icmp sgt <4 x i32> %a, %b
      9     %maxres = select <4 x i1> %cmpres, <4 x i32> %a,  <4 x i32> %b
     10     store <4 x i32> %maxres, <4 x i32>* %m
     11     ret void
     12 }
     13 
     14 ; We adjusted the cost model of the following selects. When we improve code
     15 ; lowering we also need to adjust the cost.
     16 %T0_10 = type <16 x i16>
     17 %T1_10 = type <16 x i1>
     18 ; CHECK-LABEL: func_blend10:
     19 define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
     20                            %T1_10* %blend, %T0_10* %storeaddr) {
     21   %v0 = load %T0_10* %loadaddr
     22   %v1 = load %T0_10* %loadaddr2
     23   %c = icmp slt %T0_10 %v0, %v1
     24 ; CHECK: vbsl
     25 ; CHECK: vbsl
     26 ; COST: func_blend10
     27 ; COST: cost of 40 {{.*}} select
     28   %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
     29   store %T0_10 %r, %T0_10* %storeaddr
     30   ret void
     31 }
     32 %T0_14 = type <8 x i32>
     33 %T1_14 = type <8 x i1>
     34 ; CHECK-LABEL: func_blend14:
     35 define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
     36                            %T1_14* %blend, %T0_14* %storeaddr) {
     37   %v0 = load %T0_14* %loadaddr
     38   %v1 = load %T0_14* %loadaddr2
     39   %c = icmp slt %T0_14 %v0, %v1
     40 ; CHECK: vbsl
     41 ; CHECK: vbsl
     42 ; COST: func_blend14
     43 ; COST: cost of 41 {{.*}} select
     44   %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
     45   store %T0_14 %r, %T0_14* %storeaddr
     46   ret void
     47 }
     48 %T0_15 = type <16 x i32>
     49 %T1_15 = type <16 x i1>
     50 ; CHECK-LABEL: func_blend15:
     51 define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
     52                            %T1_15* %blend, %T0_15* %storeaddr) {
     53 ; CHECK: vbsl
     54 ; CHECK: vbsl
     55   %v0 = load %T0_15* %loadaddr
     56   %v1 = load %T0_15* %loadaddr2
     57   %c = icmp slt %T0_15 %v0, %v1
     58 ; COST: func_blend15
     59 ; COST: cost of 82 {{.*}} select
     60   %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
     61   store %T0_15 %r, %T0_15* %storeaddr
     62   ret void
     63 }
     64 %T0_18 = type <4 x i64>
     65 %T1_18 = type <4 x i1>
     66 ; CHECK-LABEL: func_blend18:
     67 define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
     68                            %T1_18* %blend, %T0_18* %storeaddr) {
     69 ; CHECK: vbsl
     70 ; CHECK: vbsl
     71   %v0 = load %T0_18* %loadaddr
     72   %v1 = load %T0_18* %loadaddr2
     73   %c = icmp slt %T0_18 %v0, %v1
     74 ; COST: func_blend18
     75 ; COST: cost of 19 {{.*}} select
     76   %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
     77   store %T0_18 %r, %T0_18* %storeaddr
     78   ret void
     79 }
     80 %T0_19 = type <8 x i64>
     81 %T1_19 = type <8 x i1>
     82 ; CHECK-LABEL: func_blend19:
     83 define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
     84                            %T1_19* %blend, %T0_19* %storeaddr) {
     85 ; CHECK: vbsl
     86 ; CHECK: vbsl
     87 ; CHECK: vbsl
     88 ; CHECK: vbsl
     89   %v0 = load %T0_19* %loadaddr
     90   %v1 = load %T0_19* %loadaddr2
     91   %c = icmp slt %T0_19 %v0, %v1
     92 ; COST: func_blend19
     93 ; COST: cost of 50 {{.*}} select
     94   %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
     95   store %T0_19 %r, %T0_19* %storeaddr
     96   ret void
     97 }
     98 %T0_20 = type <16 x i64>
     99 %T1_20 = type <16 x i1>
    100 ; CHECK-LABEL: func_blend20:
    101 define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
    102                            %T1_20* %blend, %T0_20* %storeaddr) {
    103 ; CHECK: vbsl
    104 ; CHECK: vbsl
    105 ; CHECK: vbsl
    106 ; CHECK: vbsl
    107 ; CHECK: vbsl
    108 ; CHECK: vbsl
    109 ; CHECK: vbsl
    110 ; CHECK: vbsl
    111   %v0 = load %T0_20* %loadaddr
    112   %v1 = load %T0_20* %loadaddr2
    113   %c = icmp slt %T0_20 %v0, %v1
    114 ; COST: func_blend20
    115 ; COST: cost of 100 {{.*}} select
    116   %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
    117   store %T0_20 %r, %T0_20* %storeaddr
    118   ret void
    119 }
    120