1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core2 -mattr=+sse2 | FileCheck %s 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE 4 5 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a 6 ; single paddd instruction. At the moment we produce the sequence 7 ; pshufd+paddq+pshufd. This is fixed with the widening legalization. 8 9 define double @test1(double %A) { 10 ; CHECK-LABEL: test1: 11 ; CHECK: # %bb.0: 12 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] 13 ; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 14 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 15 ; CHECK-NEXT: retq 16 ; 17 ; CHECK-WIDE-LABEL: test1: 18 ; CHECK-WIDE: # %bb.0: 19 ; CHECK-WIDE-NEXT: paddd {{.*}}(%rip), %xmm0 20 ; CHECK-WIDE-NEXT: retq 21 %1 = bitcast double %A to <2 x i32> 22 %add = add <2 x i32> %1, <i32 3, i32 5> 23 %2 = bitcast <2 x i32> %add to double 24 ret double %2 25 } 26 27 define double @test2(double %A, double %B) { 28 ; CHECK-LABEL: test2: 29 ; CHECK: # %bb.0: 30 ; CHECK-NEXT: paddd %xmm1, %xmm0 31 ; CHECK-NEXT: retq 32 ; 33 ; CHECK-WIDE-LABEL: test2: 34 ; CHECK-WIDE: # %bb.0: 35 ; CHECK-WIDE-NEXT: paddd %xmm1, %xmm0 36 ; CHECK-WIDE-NEXT: retq 37 %1 = bitcast double %A to <2 x i32> 38 %2 = bitcast double %B to <2 x i32> 39 %add = add <2 x i32> %1, %2 40 %3 = bitcast <2 x i32> %add to double 41 ret double %3 42 } 43 44 define i64 @test3(i64 %A) { 45 ; CHECK-LABEL: test3: 46 ; CHECK: # %bb.0: 47 ; CHECK-NEXT: movq %rdi, %xmm0 48 ; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 49 ; CHECK-NEXT: movq %xmm0, %rax 50 ; CHECK-NEXT: retq 51 ; 52 ; CHECK-WIDE-LABEL: test3: 53 ; CHECK-WIDE: # %bb.0: 54 ; CHECK-WIDE-NEXT: movq %rdi, %xmm0 55 ; CHECK-WIDE-NEXT: addps {{.*}}(%rip), %xmm0 56 ; CHECK-WIDE-NEXT: movq %xmm0, %rax 57 ; CHECK-WIDE-NEXT: retq 58 %1 = bitcast i64 %A to <2 x float> 59 %add = fadd <2 x float> %1, <float 3.0, float 5.0> 60 %2 = bitcast <2 x float> %add to i64 61 ret i64 %2 62 } 63 64 ; FIXME: Ideally we should be able to fold the entire body of @test4 into a 65 ; single paddd instruction. This is fixed with the widening legalization. 66 67 define i64 @test4(i64 %A) { 68 ; CHECK-LABEL: test4: 69 ; CHECK: # %bb.0: 70 ; CHECK-NEXT: movq %rdi, %xmm0 71 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 72 ; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 73 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 74 ; CHECK-NEXT: movq %xmm0, %rax 75 ; CHECK-NEXT: retq 76 ; 77 ; CHECK-WIDE-LABEL: test4: 78 ; CHECK-WIDE: # %bb.0: 79 ; CHECK-WIDE-NEXT: movq %rdi, %xmm0 80 ; CHECK-WIDE-NEXT: paddd {{.*}}(%rip), %xmm0 81 ; CHECK-WIDE-NEXT: movq %xmm0, %rax 82 ; CHECK-WIDE-NEXT: retq 83 %1 = bitcast i64 %A to <2 x i32> 84 %add = add <2 x i32> %1, <i32 3, i32 5> 85 %2 = bitcast <2 x i32> %add to i64 86 ret i64 %2 87 } 88 89 define double @test5(double %A) { 90 ; CHECK-LABEL: test5: 91 ; CHECK: # %bb.0: 92 ; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 93 ; CHECK-NEXT: retq 94 ; 95 ; CHECK-WIDE-LABEL: test5: 96 ; CHECK-WIDE: # %bb.0: 97 ; CHECK-WIDE-NEXT: addps {{.*}}(%rip), %xmm0 98 ; CHECK-WIDE-NEXT: retq 99 %1 = bitcast double %A to <2 x float> 100 %add = fadd <2 x float> %1, <float 3.0, float 5.0> 101 %2 = bitcast <2 x float> %add to double 102 ret double %2 103 } 104 105 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a 106 ; single paddw instruction. This is fixed with the widening legalization. 107 108 define double @test6(double %A) { 109 ; CHECK-LABEL: test6: 110 ; CHECK: # %bb.0: 111 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 112 ; CHECK-NEXT: paddw {{.*}}(%rip), %xmm0 113 ; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 114 ; CHECK-NEXT: retq 115 ; 116 ; CHECK-WIDE-LABEL: test6: 117 ; CHECK-WIDE: # %bb.0: 118 ; CHECK-WIDE-NEXT: paddw {{.*}}(%rip), %xmm0 119 ; CHECK-WIDE-NEXT: retq 120 %1 = bitcast double %A to <4 x i16> 121 %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6> 122 %2 = bitcast <4 x i16> %add to double 123 ret double %2 124 } 125 126 define double @test7(double %A, double %B) { 127 ; CHECK-LABEL: test7: 128 ; CHECK: # %bb.0: 129 ; CHECK-NEXT: paddw %xmm1, %xmm0 130 ; CHECK-NEXT: retq 131 ; 132 ; CHECK-WIDE-LABEL: test7: 133 ; CHECK-WIDE: # %bb.0: 134 ; CHECK-WIDE-NEXT: paddw %xmm1, %xmm0 135 ; CHECK-WIDE-NEXT: retq 136 %1 = bitcast double %A to <4 x i16> 137 %2 = bitcast double %B to <4 x i16> 138 %add = add <4 x i16> %1, %2 139 %3 = bitcast <4 x i16> %add to double 140 ret double %3 141 } 142 143 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a 144 ; single paddb instruction. At the moment we produce the sequence 145 ; pshufd+paddw+pshufd. This is fixed with the widening legalization. 146 147 define double @test8(double %A) { 148 ; CHECK-LABEL: test8: 149 ; CHECK: # %bb.0: 150 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 151 ; CHECK-NEXT: paddb {{.*}}(%rip), %xmm0 152 ; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 153 ; CHECK-NEXT: retq 154 ; 155 ; CHECK-WIDE-LABEL: test8: 156 ; CHECK-WIDE: # %bb.0: 157 ; CHECK-WIDE-NEXT: paddb {{.*}}(%rip), %xmm0 158 ; CHECK-WIDE-NEXT: retq 159 %1 = bitcast double %A to <8 x i8> 160 %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10> 161 %2 = bitcast <8 x i8> %add to double 162 ret double %2 163 } 164 165 define double @test9(double %A, double %B) { 166 ; CHECK-LABEL: test9: 167 ; CHECK: # %bb.0: 168 ; CHECK-NEXT: paddb %xmm1, %xmm0 169 ; CHECK-NEXT: retq 170 ; 171 ; CHECK-WIDE-LABEL: test9: 172 ; CHECK-WIDE: # %bb.0: 173 ; CHECK-WIDE-NEXT: paddb %xmm1, %xmm0 174 ; CHECK-WIDE-NEXT: retq 175 %1 = bitcast double %A to <8 x i8> 176 %2 = bitcast double %B to <8 x i8> 177 %add = add <8 x i8> %1, %2 178 %3 = bitcast <8 x i8> %add to double 179 ret double %3 180 } 181