1 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSE -check-prefix=CHECK 2 ; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX -check-prefix=CHECK 3 4 ; Test ADDSUB ISel patterns. 5 6 ; Functions below are obtained from the following source: 7 ; 8 ; typedef double double2 __attribute__((ext_vector_type(2))); 9 ; typedef double double4 __attribute__((ext_vector_type(4))); 10 ; typedef float float4 __attribute__((ext_vector_type(4))); 11 ; typedef float float8 __attribute__((ext_vector_type(8))); 12 ; 13 ; float4 test1(float4 A, float4 B) { 14 ; float4 X = A - B; 15 ; float4 Y = A + B; 16 ; return (float4){X[0], Y[1], X[2], Y[3]}; 17 ; } 18 ; 19 ; float8 test2(float8 A, float8 B) { 20 ; float8 X = A - B; 21 ; float8 Y = A + B; 22 ; return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]}; 23 ; } 24 ; 25 ; double4 test3(double4 A, double4 B) { 26 ; double4 X = A - B; 27 ; double4 Y = A + B; 28 ; return (double4){X[0], Y[1], X[2], Y[3]}; 29 ; } 30 ; 31 ; double2 test4(double2 A, double2 B) { 32 ; double2 X = A - B; 33 ; double2 Y = A + B; 34 ; return (double2){X[0], Y[1]}; 35 ; } 36 37 define <4 x float> @test1(<4 x float> %A, <4 x float> %B) { 38 %sub = fsub <4 x float> %A, %B 39 %add = fadd <4 x float> %A, %B 40 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 41 ret <4 x float> %vecinit6 42 } 43 ; CHECK-LABEL: test1 44 ; SSE: addsubps 45 ; AVX: vaddsubps 46 ; CHECK-NEXT: ret 47 48 49 define <8 x float> @test2(<8 x float> %A, <8 x float> %B) { 50 %sub = fsub <8 x float> %A, %B 51 %add = fadd <8 x float> %A, %B 52 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 53 ret <8 x float> %vecinit14 54 } 55 ; CHECK-LABEL: test2 56 ; SSE: addsubps 57 ; SSE-NEXT: addsubps 58 ; AVX: vaddsubps 59 ; AVX-NOT: vaddsubps 60 ; CHECK: ret 61 62 63 define <4 x double> @test3(<4 x double> %A, <4 x double> %B) { 64 %sub = fsub <4 x double> %A, %B 65 %add = fadd <4 x double> %A, %B 66 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 67 ret <4 x double> %vecinit6 68 } 69 ; CHECK-LABEL: test3 70 ; SSE: addsubpd 71 ; SSE: addsubpd 72 ; AVX: vaddsubpd 73 ; AVX-NOT: vaddsubpd 74 ; CHECK: ret 75 76 77 define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 { 78 %add = fadd <2 x double> %A, %B 79 %sub = fsub <2 x double> %A, %B 80 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3> 81 ret <2 x double> %vecinit2 82 } 83 ; CHECK-LABEL: test4 84 ; SSE: addsubpd 85 ; AVX: vaddsubpd 86 ; CHECK-NEXT: ret 87 88 89 define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) { 90 %1 = load <4 x float>* %B 91 %add = fadd <4 x float> %A, %1 92 %sub = fsub <4 x float> %A, %1 93 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 94 ret <4 x float> %vecinit6 95 } 96 ; CHECK-LABEL: test1b 97 ; SSE: addsubps 98 ; AVX: vaddsubps 99 ; CHECK-NEXT: ret 100 101 102 define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) { 103 %1 = load <8 x float>* %B 104 %add = fadd <8 x float> %A, %1 105 %sub = fsub <8 x float> %A, %1 106 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 107 ret <8 x float> %vecinit14 108 } 109 ; CHECK-LABEL: test2b 110 ; SSE: addsubps 111 ; SSE-NEXT: addsubps 112 ; AVX: vaddsubps 113 ; AVX-NOT: vaddsubps 114 ; CHECK: ret 115 116 117 define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) { 118 %1 = load <4 x double>* %B 119 %add = fadd <4 x double> %A, %1 120 %sub = fsub <4 x double> %A, %1 121 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 122 ret <4 x double> %vecinit6 123 } 124 ; CHECK-LABEL: test3b 125 ; SSE: addsubpd 126 ; SSE: addsubpd 127 ; AVX: vaddsubpd 128 ; AVX-NOT: vaddsubpd 129 ; CHECK: ret 130 131 132 define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) { 133 %1 = load <2 x double>* %B 134 %sub = fsub <2 x double> %A, %1 135 %add = fadd <2 x double> %A, %1 136 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3> 137 ret <2 x double> %vecinit2 138 } 139 ; CHECK-LABEL: test4b 140 ; SSE: addsubpd 141 ; AVX: vaddsubpd 142 ; CHECK-NEXT: ret 143 144 ; Functions below are obtained from the following source: 145 ; 146 ; float4 test1(float4 A, float4 B) { 147 ; float4 X = A + B; 148 ; float4 Y = A - B; 149 ; return (float4){X[0], Y[1], X[2], Y[3]}; 150 ; } 151 ; 152 ; float8 test2(float8 A, float8 B) { 153 ; float8 X = A + B; 154 ; float8 Y = A - B; 155 ; return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]}; 156 ; } 157 ; 158 ; double4 test3(double4 A, double4 B) { 159 ; double4 X = A + B; 160 ; double4 Y = A - B; 161 ; return (double4){X[0], Y[1], X[2], Y[3]}; 162 ; } 163 ; 164 ; double2 test4(double2 A, double2 B) { 165 ; double2 X = A + B; 166 ; double2 Y = A - B; 167 ; return (double2){X[0], Y[1]}; 168 ; } 169 170 define <4 x float> @test5(<4 x float> %A, <4 x float> %B) { 171 %sub = fsub <4 x float> %A, %B 172 %add = fadd <4 x float> %A, %B 173 %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 174 ret <4 x float> %vecinit6 175 } 176 ; CHECK-LABEL: test5 177 ; SSE: xorps 178 ; SSE-NEXT: addsubps 179 ; AVX: vxorps 180 ; AVX-NEXT: vaddsubps 181 ; CHECK: ret 182 183 184 define <8 x float> @test6(<8 x float> %A, <8 x float> %B) { 185 %sub = fsub <8 x float> %A, %B 186 %add = fadd <8 x float> %A, %B 187 %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 188 ret <8 x float> %vecinit14 189 } 190 ; CHECK-LABEL: test6 191 ; SSE: xorps 192 ; SSE-NEXT: addsubps 193 ; SSE: xorps 194 ; SSE-NEXT: addsubps 195 ; AVX: vxorps 196 ; AVX-NEXT: vaddsubps 197 ; AVX-NOT: vxorps 198 ; AVX-NOT: vaddsubps 199 ; CHECK: ret 200 201 202 define <4 x double> @test7(<4 x double> %A, <4 x double> %B) { 203 %sub = fsub <4 x double> %A, %B 204 %add = fadd <4 x double> %A, %B 205 %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 206 ret <4 x double> %vecinit6 207 } 208 ; CHECK-LABEL: test7 209 ; SSE: xorpd 210 ; SSE-NEXT: addsubpd 211 ; SSE: xorpd 212 ; SSE-NEXT: addsubpd 213 ; AVX: vxorpd 214 ; AVX-NEXT: vaddsubpd 215 ; AVX-NOT: vxorpd 216 ; AVX-NOT: vaddsubpd 217 ; CHECK: ret 218 219 220 define <2 x double> @test8(<2 x double> %A, <2 x double> %B) #0 { 221 %add = fadd <2 x double> %A, %B 222 %sub = fsub <2 x double> %A, %B 223 %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 0, i32 3> 224 ret <2 x double> %vecinit2 225 } 226 ; CHECK-LABEL: test8 227 ; SSE: xorpd 228 ; SSE-NEXT: addsubpd 229 ; AVX: vxorpd 230 ; AVX-NEXT: vaddsubpd 231 ; CHECK: ret 232 233 234 define <4 x float> @test5b(<4 x float> %A, <4 x float> %B) { 235 %sub = fsub <4 x float> %A, %B 236 %add = fadd <4 x float> %B, %A 237 %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 238 ret <4 x float> %vecinit6 239 } 240 ; CHECK-LABEL: test5 241 ; SSE: xorps 242 ; SSE-NEXT: addsubps 243 ; AVX: vxorps 244 ; AVX-NEXT: vaddsubps 245 ; CHECK: ret 246 247 248 define <8 x float> @test6b(<8 x float> %A, <8 x float> %B) { 249 %sub = fsub <8 x float> %A, %B 250 %add = fadd <8 x float> %B, %A 251 %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 252 ret <8 x float> %vecinit14 253 } 254 ; CHECK-LABEL: test6 255 ; SSE: xorps 256 ; SSE-NEXT: addsubps 257 ; SSE: xorps 258 ; SSE-NEXT: addsubps 259 ; AVX: vxorps 260 ; AVX-NEXT: vaddsubps 261 ; AVX-NOT: vxorps 262 ; AVX-NOT: vaddsubps 263 ; CHECK: ret 264 265 266 define <4 x double> @test7b(<4 x double> %A, <4 x double> %B) { 267 %sub = fsub <4 x double> %A, %B 268 %add = fadd <4 x double> %B, %A 269 %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 270 ret <4 x double> %vecinit6 271 } 272 ; CHECK-LABEL: test7 273 ; SSE: xorpd 274 ; SSE-NEXT: addsubpd 275 ; SSE: xorpd 276 ; SSE-NEXT: addsubpd 277 ; AVX: vxorpd 278 ; AVX-NEXT: vaddsubpd 279 ; AVX-NOT: vxorpd 280 ; AVX-NOT: vaddsubpd 281 ; CHECK: ret 282 283 284 define <2 x double> @test8b(<2 x double> %A, <2 x double> %B) #0 { 285 %add = fadd <2 x double> %B, %A 286 %sub = fsub <2 x double> %A, %B 287 %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 0, i32 3> 288 ret <2 x double> %vecinit2 289 } 290 ; CHECK-LABEL: test8 291 ; SSE: xorpd 292 ; SSE-NEXT: addsubpd 293 ; AVX: vxorpd 294 ; AVX-NEXT: vaddsubpd 295 ; CHECK: ret 296 297