Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s 
      2 
      3 ; This test checks that only a single js gets generated in the final code
      4 ; for lowering the CMOV pseudos that get created for this IR.
      5 ; CHECK-LABEL: foo1:
      6 ; CHECK: js
      7 ; CHECK-NOT: js
      8 define i32 @foo1(i32 %v1, i32 %v2, i32 %v3) nounwind {
      9 entry:
     10   %cmp = icmp slt i32 %v1, 0
     11   %v2.v3 = select i1 %cmp, i32 %v2, i32 %v3
     12   %v1.v2 = select i1 %cmp, i32 %v1, i32 %v2
     13   %sub = sub i32 %v1.v2, %v2.v3
     14   ret i32 %sub
     15 }
     16 
     17 ; This test checks that only a single js gets generated in the final code
     18 ; for lowering the CMOV pseudos that get created for this IR. This makes
     19 ; sure the code for the lowering for opposite conditions gets tested.
     20 ; CHECK-LABEL: foo11:
     21 ; CHECK: js
     22 ; CHECK-NOT: js
     23 ; CHECK-NOT: jns
     24 define i32 @foo11(i32 %v1, i32 %v2, i32 %v3) nounwind {
     25 entry:
     26   %cmp1 = icmp slt i32 %v1, 0
     27   %v2.v3 = select i1 %cmp1, i32 %v2, i32 %v3
     28   %cmp2 = icmp sge i32 %v1, 0
     29   %v1.v2 = select i1 %cmp2, i32 %v1, i32 %v2
     30   %sub = sub i32 %v1.v2, %v2.v3
     31   ret i32 %sub
     32 }
     33 
     34 ; This test checks that only a single js gets generated in the final code
     35 ; for lowering the CMOV pseudos that get created for this IR.
     36 ; CHECK-LABEL: foo2:
     37 ; CHECK: js
     38 ; CHECK-NOT: js
     39 define i32 @foo2(i8 %v1, i8 %v2, i8 %v3) nounwind {
     40 entry:
     41   %cmp = icmp slt i8 %v1, 0
     42   %v2.v3 = select i1 %cmp, i8 %v2, i8 %v3
     43   %v1.v2 = select i1 %cmp, i8 %v1, i8 %v2
     44   %t1 = sext i8 %v2.v3 to i32
     45   %t2 = sext i8 %v1.v2 to i32
     46   %sub = sub i32 %t1, %t2
     47   ret i32 %sub
     48 }
     49 
     50 ; This test checks that only a single js gets generated in the final code
     51 ; for lowering the CMOV pseudos that get created for this IR.
     52 ; CHECK-LABEL: foo3:
     53 ; CHECK: js
     54 ; CHECK-NOT: js
     55 define i32 @foo3(i16 %v1, i16 %v2, i16 %v3) nounwind {
     56 entry:
     57   %cmp = icmp slt i16 %v1, 0
     58   %v2.v3 = select i1 %cmp, i16 %v2, i16 %v3
     59   %v1.v2 = select i1 %cmp, i16 %v1, i16 %v2
     60   %t1 = sext i16 %v2.v3 to i32
     61   %t2 = sext i16 %v1.v2 to i32
     62   %sub = sub i32 %t1, %t2
     63   ret i32 %sub
     64 }
     65 
     66 ; This test checks that only a single js gets generated in the final code
     67 ; for lowering the CMOV pseudos that get created for this IR.
     68 ; CHECK-LABEL: foo4:
     69 ; CHECK: js
     70 ; CHECK-NOT: js
     71 define float @foo4(i32 %v1, float %v2, float %v3, float %v4) nounwind {
     72 entry:
     73   %cmp = icmp slt i32 %v1, 0
     74   %t1 = select i1 %cmp, float %v2, float %v3
     75   %t2 = select i1 %cmp, float %v3, float %v4
     76   %sub = fsub float %t1, %t2
     77   ret float %sub
     78 }
     79 
     80 ; This test checks that only a single je gets generated in the final code
     81 ; for lowering the CMOV pseudos that get created for this IR.
     82 ; CHECK-LABEL: foo5:
     83 ; CHECK: je
     84 ; CHECK-NOT: je
     85 define double @foo5(i32 %v1, double %v2, double %v3, double %v4) nounwind {
     86 entry:
     87   %cmp = icmp eq i32 %v1, 0
     88   %t1 = select i1 %cmp, double %v2, double %v3
     89   %t2 = select i1 %cmp, double %v3, double %v4
     90   %sub = fsub double %t1, %t2
     91   ret double %sub
     92 }
     93 
     94 ; This test checks that only a single je gets generated in the final code
     95 ; for lowering the CMOV pseudos that get created for this IR.
     96 ; CHECK-LABEL: foo6:
     97 ; CHECK: je
     98 ; CHECK-NOT: je
     99 define <4 x float> @foo6(i32 %v1, <4 x float> %v2, <4 x float> %v3, <4 x float> %v4) nounwind {
    100 entry:
    101   %cmp = icmp eq i32 %v1, 0
    102   %t1 = select i1 %cmp, <4 x float> %v2, <4 x float> %v3
    103   %t2 = select i1 %cmp, <4 x float> %v3, <4 x float> %v4
    104   %sub = fsub <4 x float> %t1, %t2
    105   ret <4 x float> %sub
    106 }
    107 
    108 ; This test checks that only a single je gets generated in the final code
    109 ; for lowering the CMOV pseudos that get created for this IR.
    110 ; CHECK-LABEL: foo7:
    111 ; CHECK: je
    112 ; CHECK-NOT: je
    113 define <2 x double> @foo7(i32 %v1, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4) nounwind {
    114 entry:
    115   %cmp = icmp eq i32 %v1, 0
    116   %t1 = select i1 %cmp, <2 x double> %v2, <2 x double> %v3
    117   %t2 = select i1 %cmp, <2 x double> %v3, <2 x double> %v4
    118   %sub = fsub <2 x double> %t1, %t2
    119   ret <2 x double> %sub
    120 }
    121 
    122 ; This test checks that only a single ja gets generated in the final code
    123 ; for lowering the CMOV pseudos that get created for this IR. This combines
    124 ; all the supported types together into one long string of selects based
    125 ; on the same condition.
    126 ; CHECK-LABEL: foo8:
    127 ; CHECK: ja
    128 ; CHECK-NOT: ja
    129 define void @foo8(i32 %v1,
    130                   i8 %v2, i8 %v3,
    131                   i16 %v12, i16 %v13,
    132                   i32 %v22, i32 %v23,
    133                   float %v32, float %v33,
    134                   double %v42, double %v43,
    135                   <4 x float> %v52, <4 x float> %v53,
    136                   <2 x double> %v62, <2 x double> %v63,
    137                   <8 x float> %v72, <8 x float> %v73,
    138                   <4 x double> %v82, <4 x double> %v83,
    139                   <16 x float> %v92, <16 x float> %v93,
    140                   <8 x double> %v102, <8 x double> %v103,
    141                   i8 * %dst) nounwind {
    142 entry:
    143   %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 2
    144   %a11 = bitcast i8* %add.ptr11 to i16*
    145 
    146   %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4
    147   %a21 = bitcast i8* %add.ptr21 to i32*
    148 
    149   %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8
    150   %a31 = bitcast i8* %add.ptr31 to float*
    151 
    152   %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16
    153   %a41 = bitcast i8* %add.ptr41 to double*
    154 
    155   %add.ptr51 = getelementptr inbounds i8, i8* %dst, i32 32
    156   %a51 = bitcast i8* %add.ptr51 to <4 x float>*
    157 
    158   %add.ptr61 = getelementptr inbounds i8, i8* %dst, i32 48
    159   %a61 = bitcast i8* %add.ptr61 to <2 x double>*
    160 
    161   %add.ptr71 = getelementptr inbounds i8, i8* %dst, i32 64
    162   %a71 = bitcast i8* %add.ptr71 to <8 x float>*
    163 
    164   %add.ptr81 = getelementptr inbounds i8, i8* %dst, i32 128
    165   %a81 = bitcast i8* %add.ptr81 to <4 x double>*
    166 
    167   %add.ptr91 = getelementptr inbounds i8, i8* %dst, i32 64
    168   %a91 = bitcast i8* %add.ptr91 to <16 x float>*
    169 
    170   %add.ptr101 = getelementptr inbounds i8, i8* %dst, i32 128
    171   %a101 = bitcast i8* %add.ptr101 to <8 x double>*
    172 
    173   ; These operations are necessary, because select of two single use loads
    174   ; ends up getting optimized into a select of two leas, followed by a
    175   ; single load of the selected address.
    176   %t13 = xor i16 %v13, 11
    177   %t23 = xor i32 %v23, 1234
    178   %t33 = fadd float %v33, %v32
    179   %t43 = fadd double %v43, %v42
    180   %t53 = fadd <4 x float> %v53, %v52
    181   %t63 = fadd <2 x double> %v63, %v62
    182   %t73 = fsub <8 x float> %v73, %v72
    183   %t83 = fsub <4 x double> %v83, %v82
    184   %t93 = fsub <16 x float> %v93, %v92
    185   %t103 = fsub <8 x double> %v103, %v102
    186 
    187   %cmp = icmp ugt i32 %v1, 31
    188   %t11 = select i1 %cmp, i16 %v12, i16 %t13
    189   %t21 = select i1 %cmp, i32 %v22, i32 %t23
    190   %t31 = select i1 %cmp, float %v32, float %t33
    191   %t41 = select i1 %cmp, double %v42, double %t43
    192   %t51 = select i1 %cmp, <4 x float> %v52, <4 x float> %t53
    193   %t61 = select i1 %cmp, <2 x double> %v62, <2 x double> %t63
    194   %t71 = select i1 %cmp, <8 x float> %v72, <8 x float> %t73
    195   %t81 = select i1 %cmp, <4 x double> %v82, <4 x double> %t83
    196   %t91 = select i1 %cmp, <16 x float> %v92, <16 x float> %t93
    197   %t101 = select i1 %cmp, <8 x double> %v102, <8 x double> %t103
    198 
    199   store i16 %t11, i16* %a11, align 2
    200   store i32 %t21, i32* %a21, align 4
    201   store float %t31, float* %a31, align 4
    202   store double %t41, double* %a41, align 8
    203   store <4 x float> %t51, <4 x float>* %a51, align 16
    204   store <2 x double> %t61, <2 x double>* %a61, align 16
    205   store <8 x float> %t71, <8 x float>* %a71, align 32
    206   store <4 x double> %t81, <4 x double>* %a81, align 32
    207   store <16 x float> %t91, <16 x float>* %a91, align 32
    208   store <8 x double> %t101, <8 x double>* %a101, align 32
    209 
    210   ret void
    211 }
    212 
    213 ; This test checks that only a single ja gets generated in the final code
    214 ; for lowering the CMOV pseudos that get created for this IR.
    215 ; on the same condition.
    216 ; Contrary to my expectations, this doesn't exercise the code for
    217 ; CMOV_V8I1, CMOV_V16I1, CMOV_V32I1, or CMOV_V64I1.  Instead the selects all
    218 ; get lowered into vector length number of selects, which all eventually turn
    219 ; into a huge number of CMOV_GR8, which are all contiguous, so the optimization
    220 ; kicks in as long as CMOV_GR8 is supported. I couldn't find a way to get
    221 ; CMOV_V*I1 pseudo-opcodes to get generated. If a way exists to get CMOV_V*1
    222 ; pseudo-opcodes to be generated, this test should be replaced with one that
    223 ; tests those opcodes.
    224 ;
    225 ; CHECK-LABEL: foo9:
    226 ; CHECK: ja
    227 ; CHECK-NOT: ja
    228 define void @foo9(i32 %v1,
    229                   <8 x i1> %v12, <8 x i1> %v13,
    230                   <16 x i1> %v22, <16 x i1> %v23,
    231                   <32 x i1> %v32, <32 x i1> %v33,
    232                   <64 x i1> %v42, <64 x i1> %v43,
    233                   i8 * %dst) nounwind {
    234 entry:
    235   %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 0
    236   %a11 = bitcast i8* %add.ptr11 to <8 x i1>*
    237 
    238   %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4
    239   %a21 = bitcast i8* %add.ptr21 to <16 x i1>*
    240 
    241   %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8
    242   %a31 = bitcast i8* %add.ptr31 to <32 x i1>*
    243 
    244   %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16
    245   %a41 = bitcast i8* %add.ptr41 to <64 x i1>*
    246 
    247   ; These operations are necessary, because select of two single use loads
    248   ; ends up getting optimized into a select of two leas, followed by a
    249   ; single load of the selected address.
    250   %t13 = xor <8 x i1> %v13, %v12
    251   %t23 = xor <16 x i1> %v23, %v22
    252   %t33 = xor <32 x i1> %v33, %v32
    253   %t43 = xor <64 x i1> %v43, %v42
    254 
    255   %cmp = icmp ugt i32 %v1, 31
    256   %t11 = select i1 %cmp, <8 x i1> %v12, <8 x i1> %t13
    257   %t21 = select i1 %cmp, <16 x i1> %v22, <16 x i1> %t23
    258   %t31 = select i1 %cmp, <32 x i1> %v32, <32 x i1> %t33
    259   %t41 = select i1 %cmp, <64 x i1> %v42, <64 x i1> %t43
    260 
    261   store <8 x i1> %t11, <8 x i1>* %a11, align 16
    262   store <16 x i1> %t21, <16 x i1>* %a21, align 4
    263   store <32 x i1> %t31, <32 x i1>* %a31, align 8
    264   store <64 x i1> %t41, <64 x i1>* %a41, align 16
    265 
    266   ret void
    267 }
    268