1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s 2 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE 3 4 5 define double @test1(double %A) { 6 %1 = bitcast double %A to <2 x i32> 7 %add = add <2 x i32> %1, <i32 3, i32 5> 8 %2 = bitcast <2 x i32> %add to double 9 ret double %2 10 } 11 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a 12 ; single paddd instruction. At the moment we produce the sequence 13 ; pshufd+paddq+pshufd. This is fixed with the widening legalization. 14 ; 15 ; CHECK-LABEL: test1 16 ; CHECK-NOT: movsd 17 ; CHECK: pshufd 18 ; CHECK-NEXT: paddd 19 ; CHECK-NEXT: pshufd 20 ; CHECK-NEXT: ret 21 ; 22 ; CHECK-WIDE-LABEL: test1 23 ; CHECK-WIDE-NOT: movsd 24 ; CHECK-WIDE: paddd 25 ; CHECK-WIDE-NEXT: ret 26 27 28 define double @test2(double %A, double %B) { 29 %1 = bitcast double %A to <2 x i32> 30 %2 = bitcast double %B to <2 x i32> 31 %add = add <2 x i32> %1, %2 32 %3 = bitcast <2 x i32> %add to double 33 ret double %3 34 } 35 ; CHECK-LABEL: test2 36 ; CHECK-NOT: movsd 37 ; CHECK: paddd 38 ; CHECK-NEXT: ret 39 ; 40 ; CHECK-WIDE-LABEL: test2 41 ; CHECK-WIDE-NOT: movsd 42 ; CHECK-WIDE: paddd 43 ; CHECK-WIDE-NEXT: ret 44 45 46 define i64 @test3(i64 %A) { 47 %1 = bitcast i64 %A to <2 x float> 48 %add = fadd <2 x float> %1, <float 3.0, float 5.0> 49 %2 = bitcast <2 x float> %add to i64 50 ret i64 %2 51 } 52 ; CHECK-LABEL: test3 53 ; CHECK-NOT: pshufd 54 ; CHECK: addps 55 ; CHECK-NOT: pshufd 56 ; CHECK: ret 57 ; 58 ; CHECK-WIDE-LABEL: test3 59 ; CHECK-WIDE-NOT: pshufd 60 ; CHECK-WIDE: addps 61 ; CHECK-WIDE-NOT: pshufd 62 ; CHECK-WIDE: ret 63 64 65 define i64 @test4(i64 %A) { 66 %1 = bitcast i64 %A to <2 x i32> 67 %add = add <2 x i32> %1, <i32 3, i32 5> 68 %2 = bitcast <2 x i32> %add to i64 69 ret i64 %2 70 } 71 ; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd. 72 ; Ideally, we should fold that sequence into a single paddd. This is fixed with 73 ; the widening legalization. 74 ; 75 ; CHECK-LABEL: test4 76 ; CHECK: pshufd 77 ; CHECK-NEXT: paddd 78 ; CHECK-NEXT: pshufd 79 ; CHECK: ret 80 ; 81 ; CHECK-WIDE-LABEL: test4 82 ; CHECK-WIDE: movd %{{rdi|rcx}}, 83 ; CHECK-WIDE-NEXT: paddd 84 ; CHECK-WIDE-NEXT: movd {{.*}}, %rax 85 ; CHECK-WIDE: ret 86 87 88 define double @test5(double %A) { 89 %1 = bitcast double %A to <2 x float> 90 %add = fadd <2 x float> %1, <float 3.0, float 5.0> 91 %2 = bitcast <2 x float> %add to double 92 ret double %2 93 } 94 ; CHECK-LABEL: test5 95 ; CHECK: addps 96 ; CHECK-NEXT: ret 97 ; 98 ; CHECK-WIDE-LABEL: test5 99 ; CHECK-WIDE: addps 100 ; CHECK-WIDE-NEXT: ret 101 102 103 define double @test6(double %A) { 104 %1 = bitcast double %A to <4 x i16> 105 %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6> 106 %2 = bitcast <4 x i16> %add to double 107 ret double %2 108 } 109 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a 110 ; single paddw instruction. This is fixed with the widening legalization. 111 ; 112 ; CHECK-LABEL: test6 113 ; CHECK-NOT: movsd 114 ; CHECK: punpcklwd 115 ; CHECK-NEXT: paddw 116 ; CHECK-NEXT: pshufb 117 ; CHECK-NEXT: ret 118 ; 119 ; CHECK-WIDE-LABEL: test6 120 ; CHECK-WIDE-NOT: mov 121 ; CHECK-WIDE-NOT: punpcklwd 122 ; CHECK-WIDE: paddw 123 ; CHECK-WIDE-NEXT: ret 124 125 126 define double @test7(double %A, double %B) { 127 %1 = bitcast double %A to <4 x i16> 128 %2 = bitcast double %B to <4 x i16> 129 %add = add <4 x i16> %1, %2 130 %3 = bitcast <4 x i16> %add to double 131 ret double %3 132 } 133 ; CHECK-LABEL: test7 134 ; CHECK-NOT: movsd 135 ; CHECK-NOT: punpcklwd 136 ; CHECK: paddw 137 ; CHECK-NEXT: ret 138 ; 139 ; CHECK-WIDE-LABEL: test7 140 ; CHECK-WIDE-NOT: movsd 141 ; CHECK-WIDE-NOT: punpcklwd 142 ; CHECK-WIDE: paddw 143 ; CHECK-WIDE-NEXT: ret 144 145 146 define double @test8(double %A) { 147 %1 = bitcast double %A to <8 x i8> 148 %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10> 149 %2 = bitcast <8 x i8> %add to double 150 ret double %2 151 } 152 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a 153 ; single paddb instruction. At the moment we produce the sequence 154 ; pshufd+paddw+pshufd. This is fixed with the widening legalization. 155 ; 156 ; CHECK-LABEL: test8 157 ; CHECK-NOT: movsd 158 ; CHECK: punpcklbw 159 ; CHECK-NEXT: paddb 160 ; CHECK-NEXT: pshufb 161 ; CHECK-NEXT: ret 162 ; 163 ; CHECK-WIDE-LABEL: test8 164 ; CHECK-WIDE-NOT: movsd 165 ; CHECK-WIDE-NOT: punpcklbw 166 ; CHECK-WIDE: paddb 167 ; CHECK-WIDE-NEXT: ret 168 169 170 define double @test9(double %A, double %B) { 171 %1 = bitcast double %A to <8 x i8> 172 %2 = bitcast double %B to <8 x i8> 173 %add = add <8 x i8> %1, %2 174 %3 = bitcast <8 x i8> %add to double 175 ret double %3 176 } 177 ; CHECK-LABEL: test9 178 ; CHECK-NOT: movsd 179 ; CHECK-NOT: punpcklbw 180 ; CHECK: paddb 181 ; CHECK-NEXT: ret 182 ; 183 ; CHECK-WIDE-LABEL: test9 184 ; CHECK-WIDE-NOT: movsd 185 ; CHECK-WIDE-NOT: punpcklbw 186 ; CHECK-WIDE: paddb 187 ; CHECK-WIDE-NEXT: ret 188 189