Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
      2 
      3 ; Check propagation of optional IR flags (PR20802). For a flag to
      4 ; propagate from scalar instructions to their vector replacement,
      5 ; *all* scalar instructions must have the flag.
      6 
      7 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
      8 target triple = "x86_64-unknown-unknown"
      9 
     10 ; CHECK-LABEL: @exact(
     11 ; CHECK: lshr exact <4 x i32>
     12 define void @exact(i32* %x) {
     13   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
     14   %idx2 = getelementptr inbounds i32, i32* %x, i64 1
     15   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
     16   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
     17 
     18   %load1 = load i32, i32* %idx1, align 4
     19   %load2 = load i32, i32* %idx2, align 4
     20   %load3 = load i32, i32* %idx3, align 4
     21   %load4 = load i32, i32* %idx4, align 4
     22 
     23   %op1 = lshr exact i32 %load1, 1
     24   %op2 = lshr exact i32 %load2, 1
     25   %op3 = lshr exact i32 %load3, 1
     26   %op4 = lshr exact i32 %load4, 1
     27 
     28   store i32 %op1, i32* %idx1, align 4
     29   store i32 %op2, i32* %idx2, align 4
     30   store i32 %op3, i32* %idx3, align 4
     31   store i32 %op4, i32* %idx4, align 4
     32 
     33   ret void
     34 }
     35 
     36 ; CHECK-LABEL: @not_exact(
     37 ; CHECK: lshr <4 x i32>
     38 define void @not_exact(i32* %x) {
     39   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
     40   %idx2 = getelementptr inbounds i32, i32* %x, i64 1
     41   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
     42   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
     43 
     44   %load1 = load i32, i32* %idx1, align 4
     45   %load2 = load i32, i32* %idx2, align 4
     46   %load3 = load i32, i32* %idx3, align 4
     47   %load4 = load i32, i32* %idx4, align 4
     48 
     49   %op1 = lshr exact i32 %load1, 1
     50   %op2 = lshr i32 %load2, 1
     51   %op3 = lshr exact i32 %load3, 1
     52   %op4 = lshr exact i32 %load4, 1
     53 
     54   store i32 %op1, i32* %idx1, align 4
     55   store i32 %op2, i32* %idx2, align 4
     56   store i32 %op3, i32* %idx3, align 4
     57   store i32 %op4, i32* %idx4, align 4
     58 
     59   ret void
     60 }
     61 
     62 ; CHECK-LABEL: @nsw(
     63 ; CHECK: add nsw <4 x i32>
     64 define void @nsw(i32* %x) {
     65   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
     66   %idx2 = getelementptr inbounds i32, i32* %x, i64 1
     67   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
     68   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
     69 
     70   %load1 = load i32, i32* %idx1, align 4
     71   %load2 = load i32, i32* %idx2, align 4
     72   %load3 = load i32, i32* %idx3, align 4
     73   %load4 = load i32, i32* %idx4, align 4
     74 
     75   %op1 = add nsw i32 %load1, 1
     76   %op2 = add nsw i32 %load2, 1
     77   %op3 = add nsw i32 %load3, 1
     78   %op4 = add nsw i32 %load4, 1
     79 
     80   store i32 %op1, i32* %idx1, align 4
     81   store i32 %op2, i32* %idx2, align 4
     82   store i32 %op3, i32* %idx3, align 4
     83   store i32 %op4, i32* %idx4, align 4
     84 
     85   ret void
     86 }
     87 
     88 ; CHECK-LABEL: @not_nsw(
     89 ; CHECK: add <4 x i32>
     90 define void @not_nsw(i32* %x) {
     91   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
     92   %idx2 = getelementptr inbounds i32, i32* %x, i64 1
     93   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
     94   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
     95 
     96   %load1 = load i32, i32* %idx1, align 4
     97   %load2 = load i32, i32* %idx2, align 4
     98   %load3 = load i32, i32* %idx3, align 4
     99   %load4 = load i32, i32* %idx4, align 4
    100 
    101   %op1 = add nsw i32 %load1, 1
    102   %op2 = add nsw i32 %load2, 1
    103   %op3 = add nsw i32 %load3, 1
    104   %op4 = add i32 %load4, 1
    105 
    106   store i32 %op1, i32* %idx1, align 4
    107   store i32 %op2, i32* %idx2, align 4
    108   store i32 %op3, i32* %idx3, align 4
    109   store i32 %op4, i32* %idx4, align 4
    110 
    111   ret void
    112 }
    113 
    114 ; CHECK-LABEL: @nuw(
    115 ; CHECK: add nuw <4 x i32>
    116 define void @nuw(i32* %x) {
    117   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    118   %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    119   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
    120   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
    121 
    122   %load1 = load i32, i32* %idx1, align 4
    123   %load2 = load i32, i32* %idx2, align 4
    124   %load3 = load i32, i32* %idx3, align 4
    125   %load4 = load i32, i32* %idx4, align 4
    126 
    127   %op1 = add nuw i32 %load1, 1
    128   %op2 = add nuw i32 %load2, 1
    129   %op3 = add nuw i32 %load3, 1
    130   %op4 = add nuw i32 %load4, 1
    131 
    132   store i32 %op1, i32* %idx1, align 4
    133   store i32 %op2, i32* %idx2, align 4
    134   store i32 %op3, i32* %idx3, align 4
    135   store i32 %op4, i32* %idx4, align 4
    136 
    137   ret void
    138 }
    139  
    140 ; CHECK-LABEL: @not_nuw(
    141 ; CHECK: add <4 x i32>
    142 define void @not_nuw(i32* %x) {
    143   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    144   %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    145   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
    146   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
    147 
    148   %load1 = load i32, i32* %idx1, align 4
    149   %load2 = load i32, i32* %idx2, align 4
    150   %load3 = load i32, i32* %idx3, align 4
    151   %load4 = load i32, i32* %idx4, align 4
    152 
    153   %op1 = add nuw i32 %load1, 1
    154   %op2 = add i32 %load2, 1
    155   %op3 = add i32 %load3, 1
    156   %op4 = add nuw i32 %load4, 1
    157 
    158   store i32 %op1, i32* %idx1, align 4
    159   store i32 %op2, i32* %idx2, align 4
    160   store i32 %op3, i32* %idx3, align 4
    161   store i32 %op4, i32* %idx4, align 4
    162 
    163   ret void
    164 }
    165  
    166 ; CHECK-LABEL: @nnan(
    167 ; CHECK: fadd nnan <4 x float>
    168 define void @nnan(float* %x) {
    169   %idx1 = getelementptr inbounds float, float* %x, i64 0
    170   %idx2 = getelementptr inbounds float, float* %x, i64 1
    171   %idx3 = getelementptr inbounds float, float* %x, i64 2
    172   %idx4 = getelementptr inbounds float, float* %x, i64 3
    173 
    174   %load1 = load float, float* %idx1, align 4
    175   %load2 = load float, float* %idx2, align 4
    176   %load3 = load float, float* %idx3, align 4
    177   %load4 = load float, float* %idx4, align 4
    178 
    179   %op1 = fadd fast nnan float %load1, 1.0
    180   %op2 = fadd nnan ninf float %load2, 1.0
    181   %op3 = fadd nsz nnan float %load3, 1.0
    182   %op4 = fadd arcp nnan float %load4, 1.0
    183 
    184   store float %op1, float* %idx1, align 4
    185   store float %op2, float* %idx2, align 4
    186   store float %op3, float* %idx3, align 4
    187   store float %op4, float* %idx4, align 4
    188 
    189   ret void
    190 }
    191  
    192 ; CHECK-LABEL: @not_nnan(
    193 ; CHECK: fadd <4 x float>
    194 define void @not_nnan(float* %x) {
    195   %idx1 = getelementptr inbounds float, float* %x, i64 0
    196   %idx2 = getelementptr inbounds float, float* %x, i64 1
    197   %idx3 = getelementptr inbounds float, float* %x, i64 2
    198   %idx4 = getelementptr inbounds float, float* %x, i64 3
    199 
    200   %load1 = load float, float* %idx1, align 4
    201   %load2 = load float, float* %idx2, align 4
    202   %load3 = load float, float* %idx3, align 4
    203   %load4 = load float, float* %idx4, align 4
    204 
    205   %op1 = fadd nnan float %load1, 1.0
    206   %op2 = fadd ninf float %load2, 1.0
    207   %op3 = fadd nsz float %load3, 1.0
    208   %op4 = fadd arcp float %load4, 1.0
    209 
    210   store float %op1, float* %idx1, align 4
    211   store float %op2, float* %idx2, align 4
    212   store float %op3, float* %idx3, align 4
    213   store float %op4, float* %idx4, align 4
    214 
    215   ret void
    216 }
    217  
    218 ; CHECK-LABEL: @only_fast(
    219 ; CHECK: fadd fast <4 x float>
    220 define void @only_fast(float* %x) {
    221   %idx1 = getelementptr inbounds float, float* %x, i64 0
    222   %idx2 = getelementptr inbounds float, float* %x, i64 1
    223   %idx3 = getelementptr inbounds float, float* %x, i64 2
    224   %idx4 = getelementptr inbounds float, float* %x, i64 3
    225 
    226   %load1 = load float, float* %idx1, align 4
    227   %load2 = load float, float* %idx2, align 4
    228   %load3 = load float, float* %idx3, align 4
    229   %load4 = load float, float* %idx4, align 4
    230 
    231   %op1 = fadd fast nnan float %load1, 1.0
    232   %op2 = fadd fast nnan ninf float %load2, 1.0
    233   %op3 = fadd fast nsz nnan float %load3, 1.0
    234   %op4 = fadd arcp nnan fast float %load4, 1.0
    235 
    236   store float %op1, float* %idx1, align 4
    237   store float %op2, float* %idx2, align 4
    238   store float %op3, float* %idx3, align 4
    239   store float %op4, float* %idx4, align 4
    240 
    241   ret void
    242 }
    243  
    244 ; CHECK-LABEL: @only_arcp(
    245 ; CHECK: fadd arcp <4 x float>
    246 define void @only_arcp(float* %x) {
    247   %idx1 = getelementptr inbounds float, float* %x, i64 0
    248   %idx2 = getelementptr inbounds float, float* %x, i64 1
    249   %idx3 = getelementptr inbounds float, float* %x, i64 2
    250   %idx4 = getelementptr inbounds float, float* %x, i64 3
    251 
    252   %load1 = load float, float* %idx1, align 4
    253   %load2 = load float, float* %idx2, align 4
    254   %load3 = load float, float* %idx3, align 4
    255   %load4 = load float, float* %idx4, align 4
    256 
    257   %op1 = fadd fast float %load1, 1.0
    258   %op2 = fadd fast float %load2, 1.0
    259   %op3 = fadd fast float %load3, 1.0
    260   %op4 = fadd arcp float %load4, 1.0
    261 
    262   store float %op1, float* %idx1, align 4
    263   store float %op2, float* %idx2, align 4
    264   store float %op3, float* %idx3, align 4
    265   store float %op4, float* %idx4, align 4
    266 
    267   ret void
    268 }
    269 
    270 ; CHECK-LABEL: @addsub_all_nsw
    271 ; CHECK: add nsw <4 x i32>
    272 ; CHECK: sub nsw <4 x i32>
    273 define void @addsub_all_nsw(i32* %x) {
    274   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    275   %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    276   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
    277   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
    278 
    279   %load1 = load i32, i32* %idx1, align 4
    280   %load2 = load i32, i32* %idx2, align 4
    281   %load3 = load i32, i32* %idx3, align 4
    282   %load4 = load i32, i32* %idx4, align 4
    283 
    284   %op1 = add nsw i32 %load1, 1
    285   %op2 = sub nsw i32 %load2, 1
    286   %op3 = add nsw i32 %load3, 1
    287   %op4 = sub nsw i32 %load4, 1
    288 
    289   store i32 %op1, i32* %idx1, align 4
    290   store i32 %op2, i32* %idx2, align 4
    291   store i32 %op3, i32* %idx3, align 4
    292   store i32 %op4, i32* %idx4, align 4
    293 
    294   ret void
    295 }
    296  
    297 ; CHECK-LABEL: @addsub_some_nsw
    298 ; CHECK: add nsw <4 x i32>
    299 ; CHECK: sub <4 x i32>
    300 define void @addsub_some_nsw(i32* %x) {
    301   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    302   %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    303   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
    304   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
    305 
    306   %load1 = load i32, i32* %idx1, align 4
    307   %load2 = load i32, i32* %idx2, align 4
    308   %load3 = load i32, i32* %idx3, align 4
    309   %load4 = load i32, i32* %idx4, align 4
    310 
    311   %op1 = add nsw i32 %load1, 1
    312   %op2 = sub nsw i32 %load2, 1
    313   %op3 = add nsw i32 %load3, 1
    314   %op4 = sub i32 %load4, 1
    315 
    316   store i32 %op1, i32* %idx1, align 4
    317   store i32 %op2, i32* %idx2, align 4
    318   store i32 %op3, i32* %idx3, align 4
    319   store i32 %op4, i32* %idx4, align 4
    320 
    321   ret void
    322 }
    323  
    324 ; CHECK-LABEL: @addsub_no_nsw
    325 ; CHECK: add <4 x i32>
    326 ; CHECK: sub <4 x i32>
    327 define void @addsub_no_nsw(i32* %x) {
    328   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    329   %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    330   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
    331   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
    332 
    333   %load1 = load i32, i32* %idx1, align 4
    334   %load2 = load i32, i32* %idx2, align 4
    335   %load3 = load i32, i32* %idx3, align 4
    336   %load4 = load i32, i32* %idx4, align 4
    337 
    338   %op1 = add i32 %load1, 1
    339   %op2 = sub nsw i32 %load2, 1
    340   %op3 = add nsw i32 %load3, 1
    341   %op4 = sub i32 %load4, 1
    342 
    343   store i32 %op1, i32* %idx1, align 4
    344   store i32 %op2, i32* %idx2, align 4
    345   store i32 %op3, i32* %idx3, align 4
    346   store i32 %op4, i32* %idx4, align 4
    347 
    348   ret void
    349 }
    350  
    351