1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=x86_64-pc-linux -mattr=mmx < %s | FileCheck %s 3 4 ; MMX packed sub opcodes were wrongly marked as commutative. 5 ; This test checks that the operands of packed sub instructions are 6 ; never interchanged by the "Two-Address instruction pass". 7 8 declare { i64, double } @getFirstParam() 9 declare { i64, double } @getSecondParam() 10 11 define i64 @test_psubb() { 12 ; CHECK-LABEL: test_psubb: 13 ; CHECK: # %bb.0: # %entry 14 ; CHECK-NEXT: pushq %rbx 15 ; CHECK-NEXT: .cfi_def_cfa_offset 16 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 17 ; CHECK-NEXT: callq getFirstParam 18 ; CHECK-NEXT: movq %rax, %rbx 19 ; CHECK-NEXT: callq getSecondParam 20 ; CHECK-NEXT: movq %rbx, %mm0 21 ; CHECK-NEXT: movq %rax, %mm1 22 ; CHECK-NEXT: psubb %mm1, %mm0 23 ; CHECK-NEXT: movq %mm0, %rax 24 ; CHECK-NEXT: popq %rbx 25 ; CHECK-NEXT: .cfi_def_cfa_offset 8 26 ; CHECK-NEXT: retq 27 entry: 28 %call = tail call { i64, double } @getFirstParam() 29 %0 = extractvalue { i64, double } %call, 0 30 %call2 = tail call { i64, double } @getSecondParam() 31 %1 = extractvalue { i64, double } %call2, 0 32 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 33 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 34 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> 35 %3 = bitcast <8 x i8> %2 to x86_mmx 36 %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> 37 %5 = bitcast <8 x i8> %4 to x86_mmx 38 %6 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %3, x86_mmx %5) nounwind 39 %7 = bitcast x86_mmx %6 to <8 x i8> 40 %8 = bitcast <8 x i8> %7 to <1 x i64> 41 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 42 ret i64 %retval.0.extract.i15 43 } 44 45 define i64 @test_psubw() { 46 ; CHECK-LABEL: test_psubw: 47 ; CHECK: # %bb.0: # %entry 48 ; CHECK-NEXT: pushq %rbx 49 ; CHECK-NEXT: .cfi_def_cfa_offset 16 50 ; CHECK-NEXT: .cfi_offset %rbx, -16 51 ; CHECK-NEXT: callq getFirstParam 52 ; CHECK-NEXT: movq %rax, %rbx 53 ; CHECK-NEXT: callq getSecondParam 54 ; CHECK-NEXT: movq %rbx, %mm0 55 ; CHECK-NEXT: movq %rax, %mm1 56 ; CHECK-NEXT: psubw %mm1, %mm0 57 ; CHECK-NEXT: movq %mm0, %rax 58 ; CHECK-NEXT: popq %rbx 59 ; CHECK-NEXT: .cfi_def_cfa_offset 8 60 ; CHECK-NEXT: retq 61 entry: 62 %call = tail call { i64, double } @getFirstParam() 63 %0 = extractvalue { i64, double } %call, 0 64 %call2 = tail call { i64, double } @getSecondParam() 65 %1 = extractvalue { i64, double } %call2, 0 66 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 67 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 68 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> 69 %3 = bitcast <4 x i16> %2 to x86_mmx 70 %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> 71 %5 = bitcast <4 x i16> %4 to x86_mmx 72 %6 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %3, x86_mmx %5) nounwind 73 %7 = bitcast x86_mmx %6 to <4 x i16> 74 %8 = bitcast <4 x i16> %7 to <1 x i64> 75 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 76 ret i64 %retval.0.extract.i15 77 } 78 79 define i64 @test_psubd() { 80 ; CHECK-LABEL: test_psubd: 81 ; CHECK: # %bb.0: # %entry 82 ; CHECK-NEXT: pushq %rbx 83 ; CHECK-NEXT: .cfi_def_cfa_offset 16 84 ; CHECK-NEXT: .cfi_offset %rbx, -16 85 ; CHECK-NEXT: callq getFirstParam 86 ; CHECK-NEXT: movq %rax, %rbx 87 ; CHECK-NEXT: callq getSecondParam 88 ; CHECK-NEXT: movq %rbx, %mm0 89 ; CHECK-NEXT: movq %rax, %mm1 90 ; CHECK-NEXT: psubd %mm1, %mm0 91 ; CHECK-NEXT: movq %mm0, %rax 92 ; CHECK-NEXT: popq %rbx 93 ; CHECK-NEXT: .cfi_def_cfa_offset 8 94 ; CHECK-NEXT: retq 95 entry: 96 %call = tail call { i64, double } @getFirstParam() 97 %0 = extractvalue { i64, double } %call, 0 98 %call2 = tail call { i64, double } @getSecondParam() 99 %1 = extractvalue { i64, double } %call2, 0 100 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 101 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 102 %2 = bitcast <1 x i64> %__m1.0.insert.i to <2 x i32> 103 %3 = bitcast <2 x i32> %2 to x86_mmx 104 %4 = bitcast <1 x i64> %__m2.0.insert.i to <2 x i32> 105 %5 = bitcast <2 x i32> %4 to x86_mmx 106 %6 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %3, x86_mmx %5) nounwind 107 %7 = bitcast x86_mmx %6 to <2 x i32> 108 %8 = bitcast <2 x i32> %7 to <1 x i64> 109 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 110 ret i64 %retval.0.extract.i15 111 } 112 113 define i64 @test_psubsb() { 114 ; CHECK-LABEL: test_psubsb: 115 ; CHECK: # %bb.0: # %entry 116 ; CHECK-NEXT: pushq %rbx 117 ; CHECK-NEXT: .cfi_def_cfa_offset 16 118 ; CHECK-NEXT: .cfi_offset %rbx, -16 119 ; CHECK-NEXT: callq getFirstParam 120 ; CHECK-NEXT: movq %rax, %rbx 121 ; CHECK-NEXT: callq getSecondParam 122 ; CHECK-NEXT: movq %rbx, %mm0 123 ; CHECK-NEXT: movq %rax, %mm1 124 ; CHECK-NEXT: psubsb %mm1, %mm0 125 ; CHECK-NEXT: movq %mm0, %rax 126 ; CHECK-NEXT: popq %rbx 127 ; CHECK-NEXT: .cfi_def_cfa_offset 8 128 ; CHECK-NEXT: retq 129 entry: 130 %call = tail call { i64, double } @getFirstParam() 131 %0 = extractvalue { i64, double } %call, 0 132 %call2 = tail call { i64, double } @getSecondParam() 133 %1 = extractvalue { i64, double } %call2, 0 134 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 135 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 136 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> 137 %3 = bitcast <8 x i8> %2 to x86_mmx 138 %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> 139 %5 = bitcast <8 x i8> %4 to x86_mmx 140 %6 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %3, x86_mmx %5) nounwind 141 %7 = bitcast x86_mmx %6 to <8 x i8> 142 %8 = bitcast <8 x i8> %7 to <1 x i64> 143 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 144 ret i64 %retval.0.extract.i15 145 } 146 147 define i64 @test_psubswv() { 148 ; CHECK-LABEL: test_psubswv: 149 ; CHECK: # %bb.0: # %entry 150 ; CHECK-NEXT: pushq %rbx 151 ; CHECK-NEXT: .cfi_def_cfa_offset 16 152 ; CHECK-NEXT: .cfi_offset %rbx, -16 153 ; CHECK-NEXT: callq getFirstParam 154 ; CHECK-NEXT: movq %rax, %rbx 155 ; CHECK-NEXT: callq getSecondParam 156 ; CHECK-NEXT: movq %rbx, %mm0 157 ; CHECK-NEXT: movq %rax, %mm1 158 ; CHECK-NEXT: psubsw %mm1, %mm0 159 ; CHECK-NEXT: movq %mm0, %rax 160 ; CHECK-NEXT: popq %rbx 161 ; CHECK-NEXT: .cfi_def_cfa_offset 8 162 ; CHECK-NEXT: retq 163 entry: 164 %call = tail call { i64, double } @getFirstParam() 165 %0 = extractvalue { i64, double } %call, 0 166 %call2 = tail call { i64, double } @getSecondParam() 167 %1 = extractvalue { i64, double } %call2, 0 168 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 169 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 170 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> 171 %3 = bitcast <4 x i16> %2 to x86_mmx 172 %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> 173 %5 = bitcast <4 x i16> %4 to x86_mmx 174 %6 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %3, x86_mmx %5) nounwind 175 %7 = bitcast x86_mmx %6 to <4 x i16> 176 %8 = bitcast <4 x i16> %7 to <1 x i64> 177 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 178 ret i64 %retval.0.extract.i15 179 } 180 181 define i64 @test_psubusbv() { 182 ; CHECK-LABEL: test_psubusbv: 183 ; CHECK: # %bb.0: # %entry 184 ; CHECK-NEXT: pushq %rbx 185 ; CHECK-NEXT: .cfi_def_cfa_offset 16 186 ; CHECK-NEXT: .cfi_offset %rbx, -16 187 ; CHECK-NEXT: callq getFirstParam 188 ; CHECK-NEXT: movq %rax, %rbx 189 ; CHECK-NEXT: callq getSecondParam 190 ; CHECK-NEXT: movq %rbx, %mm0 191 ; CHECK-NEXT: movq %rax, %mm1 192 ; CHECK-NEXT: psubusb %mm1, %mm0 193 ; CHECK-NEXT: movq %mm0, %rax 194 ; CHECK-NEXT: popq %rbx 195 ; CHECK-NEXT: .cfi_def_cfa_offset 8 196 ; CHECK-NEXT: retq 197 entry: 198 %call = tail call { i64, double } @getFirstParam() 199 %0 = extractvalue { i64, double } %call, 0 200 %call2 = tail call { i64, double } @getSecondParam() 201 %1 = extractvalue { i64, double } %call2, 0 202 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 203 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 204 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> 205 %3 = bitcast <8 x i8> %2 to x86_mmx 206 %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> 207 %5 = bitcast <8 x i8> %4 to x86_mmx 208 %6 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %3, x86_mmx %5) nounwind 209 %7 = bitcast x86_mmx %6 to <8 x i8> 210 %8 = bitcast <8 x i8> %7 to <1 x i64> 211 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 212 ret i64 %retval.0.extract.i15 213 } 214 215 define i64 @test_psubuswv() { 216 ; CHECK-LABEL: test_psubuswv: 217 ; CHECK: # %bb.0: # %entry 218 ; CHECK-NEXT: pushq %rbx 219 ; CHECK-NEXT: .cfi_def_cfa_offset 16 220 ; CHECK-NEXT: .cfi_offset %rbx, -16 221 ; CHECK-NEXT: callq getFirstParam 222 ; CHECK-NEXT: movq %rax, %rbx 223 ; CHECK-NEXT: callq getSecondParam 224 ; CHECK-NEXT: movq %rbx, %mm0 225 ; CHECK-NEXT: movq %rax, %mm1 226 ; CHECK-NEXT: psubusw %mm1, %mm0 227 ; CHECK-NEXT: movq %mm0, %rax 228 ; CHECK-NEXT: popq %rbx 229 ; CHECK-NEXT: .cfi_def_cfa_offset 8 230 ; CHECK-NEXT: retq 231 entry: 232 %call = tail call { i64, double } @getFirstParam() 233 %0 = extractvalue { i64, double } %call, 0 234 %call2 = tail call { i64, double } @getSecondParam() 235 %1 = extractvalue { i64, double } %call2, 0 236 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 237 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 238 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> 239 %3 = bitcast <4 x i16> %2 to x86_mmx 240 %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> 241 %5 = bitcast <4 x i16> %4 to x86_mmx 242 %6 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %3, x86_mmx %5) nounwind 243 %7 = bitcast x86_mmx %6 to <4 x i16> 244 %8 = bitcast <4 x i16> %7 to <1 x i64> 245 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 246 ret i64 %retval.0.extract.i15 247 } 248 249 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone 250 251 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone 252 253 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone 254 255 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone 256 257 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone 258 259 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone 260 261 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone 262