1 ; RUN: opt -S -instcombine < %s | FileCheck %s 2 3 define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp { 4 entry: 5 %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind 6 ret <4 x i32> %a 7 ; CHECK: entry: 8 ; CHECK-NEXT: ret <4 x i32> zeroinitializer 9 } 10 11 define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp { 12 entry: 13 %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 14 ret <4 x i32> %a 15 ; CHECK: entry: 16 ; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> 17 ; CHECK-NEXT: ret <4 x i32> %a 18 } 19 20 define <4 x i32> @constantMul() nounwind readnone ssp { 21 entry: 22 %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind 23 ret <4 x i32> %a 24 ; CHECK: entry: 25 ; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6> 26 } 27 28 define <4 x i32> @constantMulS() nounwind readnone ssp { 29 entry: 30 %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 31 ret <4 x i32> %b 32 ; CHECK: entry: 33 ; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 34 } 35 36 define <4 x i32> @constantMulU() nounwind readnone ssp { 37 entry: 38 %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 39 ret <4 x i32> %b 40 ; CHECK: entry: 41 ; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535> 42 } 43 44 define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp { 45 entry: 46 %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind 47 %b = add <4 x i32> zeroinitializer, %a 48 ret <4 x i32> %b 49 ; CHECK: entry: 50 ; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]] 51 ; CHECK-NEXT: ret <4 x i32> %a 52 } 53 54 define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp { 55 entry: 56 %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind 57 %b = add <4 x i32> %x, %a 58 ret <4 x i32> %b 59 ; CHECK: entry: 60 ; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6> 61 ; CHECK-NEXT: ret <4 x i32> %b 62 } 63 64 declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 65 declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 66 67 ; ARM64 variants - <rdar://problem/12349617> 68 69 define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp { 70 entry: 71 %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind 72 ret <4 x i32> %a 73 ; CHECK: entry: 74 ; CHECK-NEXT: ret <4 x i32> zeroinitializer 75 } 76 77 define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp { 78 entry: 79 %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 80 ret <4 x i32> %a 81 ; CHECK: entry: 82 ; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> 83 ; CHECK-NEXT: ret <4 x i32> %a 84 } 85 86 define <4 x i32> @constantMulARM64() nounwind readnone ssp { 87 entry: 88 %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind 89 ret <4 x i32> %a 90 ; CHECK: entry: 91 ; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6> 92 } 93 94 define <4 x i32> @constantMulSARM64() nounwind readnone ssp { 95 entry: 96 %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 97 ret <4 x i32> %b 98 ; CHECK: entry: 99 ; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 100 } 101 102 define <4 x i32> @constantMulUARM64() nounwind readnone ssp { 103 entry: 104 %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 105 ret <4 x i32> %b 106 ; CHECK: entry: 107 ; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535> 108 } 109 110 define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp { 111 entry: 112 %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind 113 %b = add <4 x i32> zeroinitializer, %a 114 ret <4 x i32> %b 115 ; CHECK: entry: 116 ; CHECK-NEXT: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]] 117 ; CHECK-NEXT: ret <4 x i32> %a 118 } 119 120 define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp { 121 entry: 122 %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind 123 %b = add <4 x i32> %x, %a 124 ret <4 x i32> %b 125 ; CHECK: entry: 126 ; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6> 127 ; CHECK-NEXT: ret <4 x i32> %b 128 } 129 130 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 131 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 132 133 ; CHECK: attributes #0 = { nounwind readnone ssp } 134 ; CHECK: attributes #1 = { nounwind readnone } 135 ; CHECK: attributes [[NUW]] = { nounwind } 136