Home | History | Annotate | Download | only in InstCombine
      1 ; RUN: opt -S -instcombine < %s | FileCheck %s
      2 
      3 define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
      4 entry:
      5   %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
      6   ret <4 x i32> %a
      7 ; CHECK: entry:
      8 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
      9 }
     10 
     11 define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp {
     12 entry:
     13   %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
     14   ret <4 x i32> %a
     15 ; CHECK: entry:
     16 ; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
     17 ; CHECK-NEXT: ret <4 x i32> %a
     18 }
     19 
     20 define <4 x i32> @constantMul() nounwind readnone ssp {
     21 entry:
     22   %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
     23   ret <4 x i32> %a
     24 ; CHECK: entry:
     25 ; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
     26 }
     27 
     28 define <4 x i32> @constantMulS() nounwind readnone ssp {
     29 entry:
     30   %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
     31   ret <4 x i32> %b
     32 ; CHECK: entry:
     33 ; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
     34 }
     35 
     36 define <4 x i32> @constantMulU() nounwind readnone ssp {
     37 entry:
     38   %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
     39   ret <4 x i32> %b
     40 ; CHECK: entry:
     41 ; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
     42 }
     43 
     44 define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp {
     45 entry:
     46   %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
     47   %b = add <4 x i32> zeroinitializer, %a
     48   ret <4 x i32> %b
     49 ; CHECK: entry:
     50 ; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
     51 ; CHECK-NEXT: ret <4 x i32> %a
     52 }
     53 
     54 define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp {
     55 entry:
     56   %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
     57   %b = add <4 x i32> %x, %a
     58   ret <4 x i32> %b  
     59 ; CHECK: entry:
     60 ; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
     61 ; CHECK-NEXT: ret <4 x i32> %b
     62 }
     63 
     64 declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
     65 declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
     66 
     67 ; ARM64 variants - <rdar://problem/12349617>
     68 
     69 define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp {
     70 entry:
     71   %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
     72   ret <4 x i32> %a
     73 ; CHECK: entry:
     74 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
     75 }
     76 
     77 define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp {
     78 entry:
     79   %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
     80   ret <4 x i32> %a
     81 ; CHECK: entry:
     82 ; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
     83 ; CHECK-NEXT: ret <4 x i32> %a
     84 }
     85 
     86 define <4 x i32> @constantMulARM64() nounwind readnone ssp {
     87 entry:
     88   %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
     89   ret <4 x i32> %a
     90 ; CHECK: entry:
     91 ; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
     92 }
     93 
     94 define <4 x i32> @constantMulSARM64() nounwind readnone ssp {
     95 entry:
     96   %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
     97   ret <4 x i32> %b
     98 ; CHECK: entry:
     99 ; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
    100 }
    101 
    102 define <4 x i32> @constantMulUARM64() nounwind readnone ssp {
    103 entry:
    104   %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
    105   ret <4 x i32> %b
    106 ; CHECK: entry:
    107 ; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
    108 }
    109 
    110 define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp {
    111 entry:
    112   %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
    113   %b = add <4 x i32> zeroinitializer, %a
    114   ret <4 x i32> %b
    115 ; CHECK: entry:
    116 ; CHECK-NEXT: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
    117 ; CHECK-NEXT: ret <4 x i32> %a
    118 }
    119 
    120 define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp {
    121 entry:
    122   %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
    123   %b = add <4 x i32> %x, %a
    124   ret <4 x i32> %b
    125 ; CHECK: entry:
    126 ; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
    127 ; CHECK-NEXT: ret <4 x i32> %b
    128 }
    129 
    130 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
    131 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
    132 
    133 ; CHECK: attributes #0 = { nounwind readnone ssp }
    134 ; CHECK: attributes #1 = { nounwind readnone }
    135 ; CHECK: attributes [[NUW]] = { nounwind }
    136