1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE 3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 5 6 ; Test ADDSUB ISel patterns. 7 8 ; Functions below are obtained from the following source: 9 ; 10 ; typedef double double2 __attribute__((ext_vector_type(2))); 11 ; typedef double double4 __attribute__((ext_vector_type(4))); 12 ; typedef float float4 __attribute__((ext_vector_type(4))); 13 ; typedef float float8 __attribute__((ext_vector_type(8))); 14 ; 15 ; float4 test1(float4 A, float4 B) { 16 ; float4 X = A - B; 17 ; float4 Y = A + B; 18 ; return (float4){X[0], Y[1], X[2], Y[3]}; 19 ; } 20 ; 21 ; float8 test2(float8 A, float8 B) { 22 ; float8 X = A - B; 23 ; float8 Y = A + B; 24 ; return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]}; 25 ; } 26 ; 27 ; double4 test3(double4 A, double4 B) { 28 ; double4 X = A - B; 29 ; double4 Y = A + B; 30 ; return (double4){X[0], Y[1], X[2], Y[3]}; 31 ; } 32 ; 33 ; double2 test4(double2 A, double2 B) { 34 ; double2 X = A - B; 35 ; double2 Y = A + B; 36 ; return (double2){X[0], Y[1]}; 37 ; } 38 39 define <4 x float> @test1(<4 x float> %A, <4 x float> %B) { 40 ; SSE-LABEL: test1: 41 ; SSE: # %bb.0: 42 ; SSE-NEXT: addsubps %xmm1, %xmm0 43 ; SSE-NEXT: retq 44 ; 45 ; AVX-LABEL: test1: 46 ; AVX: # %bb.0: 47 ; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 48 ; AVX-NEXT: retq 49 %sub = fsub <4 x float> %A, %B 50 %add = fadd <4 x float> %A, %B 51 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 52 ret <4 x float> %vecinit6 53 } 54 55 define <8 x float> @test2(<8 x float> %A, <8 x float> %B) { 56 ; SSE-LABEL: test2: 57 ; SSE: # %bb.0: 58 ; SSE-NEXT: addsubps %xmm2, %xmm0 59 ; SSE-NEXT: addsubps %xmm3, %xmm1 60 ; SSE-NEXT: retq 61 ; 62 ; AVX-LABEL: test2: 63 ; AVX: # %bb.0: 64 ; AVX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 65 ; AVX-NEXT: retq 66 %sub = fsub <8 x float> %A, %B 67 %add = fadd <8 x float> %A, %B 68 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 69 ret <8 x float> %vecinit14 70 } 71 72 define <4 x double> @test3(<4 x double> %A, <4 x double> %B) { 73 ; SSE-LABEL: test3: 74 ; SSE: # %bb.0: 75 ; SSE-NEXT: addsubpd %xmm2, %xmm0 76 ; SSE-NEXT: addsubpd %xmm3, %xmm1 77 ; SSE-NEXT: retq 78 ; 79 ; AVX-LABEL: test3: 80 ; AVX: # %bb.0: 81 ; AVX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 82 ; AVX-NEXT: retq 83 %sub = fsub <4 x double> %A, %B 84 %add = fadd <4 x double> %A, %B 85 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 86 ret <4 x double> %vecinit6 87 } 88 89 define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 { 90 ; SSE-LABEL: test4: 91 ; SSE: # %bb.0: 92 ; SSE-NEXT: addsubpd %xmm1, %xmm0 93 ; SSE-NEXT: retq 94 ; 95 ; AVX-LABEL: test4: 96 ; AVX: # %bb.0: 97 ; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 98 ; AVX-NEXT: retq 99 %add = fadd <2 x double> %A, %B 100 %sub = fsub <2 x double> %A, %B 101 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3> 102 ret <2 x double> %vecinit2 103 } 104 105 define <16 x float> @test5(<16 x float> %A, <16 x float> %B) { 106 ; SSE-LABEL: test5: 107 ; SSE: # %bb.0: 108 ; SSE-NEXT: addsubps %xmm4, %xmm0 109 ; SSE-NEXT: addsubps %xmm5, %xmm1 110 ; SSE-NEXT: addsubps %xmm6, %xmm2 111 ; SSE-NEXT: addsubps %xmm7, %xmm3 112 ; SSE-NEXT: retq 113 ; 114 ; AVX1-LABEL: test5: 115 ; AVX1: # %bb.0: 116 ; AVX1-NEXT: vaddsubps %ymm2, %ymm0, %ymm0 117 ; AVX1-NEXT: vaddsubps %ymm3, %ymm1, %ymm1 118 ; AVX1-NEXT: retq 119 ; 120 ; AVX512-LABEL: test5: 121 ; AVX512: # %bb.0: 122 ; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm2 123 ; AVX512-NEXT: movw $-21846, %ax # imm = 0xAAAA 124 ; AVX512-NEXT: kmovw %eax, %k1 125 ; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} 126 ; AVX512-NEXT: vmovaps %zmm2, %zmm0 127 ; AVX512-NEXT: retq 128 %add = fadd <16 x float> %A, %B 129 %sub = fsub <16 x float> %A, %B 130 %vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 131 ret <16 x float> %vecinit2 132 } 133 134 define <8 x double> @test6(<8 x double> %A, <8 x double> %B) { 135 ; SSE-LABEL: test6: 136 ; SSE: # %bb.0: 137 ; SSE-NEXT: addsubpd %xmm4, %xmm0 138 ; SSE-NEXT: addsubpd %xmm5, %xmm1 139 ; SSE-NEXT: addsubpd %xmm6, %xmm2 140 ; SSE-NEXT: addsubpd %xmm7, %xmm3 141 ; SSE-NEXT: retq 142 ; 143 ; AVX1-LABEL: test6: 144 ; AVX1: # %bb.0: 145 ; AVX1-NEXT: vaddsubpd %ymm2, %ymm0, %ymm0 146 ; AVX1-NEXT: vaddsubpd %ymm3, %ymm1, %ymm1 147 ; AVX1-NEXT: retq 148 ; 149 ; AVX512-LABEL: test6: 150 ; AVX512: # %bb.0: 151 ; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm2 152 ; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0 153 ; AVX512-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm2[1],zmm0[2],zmm2[3],zmm0[4],zmm2[5],zmm0[6],zmm2[7] 154 ; AVX512-NEXT: retq 155 %add = fadd <8 x double> %A, %B 156 %sub = fsub <8 x double> %A, %B 157 %vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 158 ret <8 x double> %vecinit2 159 } 160 161 define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) { 162 ; SSE-LABEL: test1b: 163 ; SSE: # %bb.0: 164 ; SSE-NEXT: addsubps (%rdi), %xmm0 165 ; SSE-NEXT: retq 166 ; 167 ; AVX-LABEL: test1b: 168 ; AVX: # %bb.0: 169 ; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 170 ; AVX-NEXT: retq 171 %1 = load <4 x float>, <4 x float>* %B 172 %add = fadd <4 x float> %A, %1 173 %sub = fsub <4 x float> %A, %1 174 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 175 ret <4 x float> %vecinit6 176 } 177 178 define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) { 179 ; SSE-LABEL: test2b: 180 ; SSE: # %bb.0: 181 ; SSE-NEXT: addsubps (%rdi), %xmm0 182 ; SSE-NEXT: addsubps 16(%rdi), %xmm1 183 ; SSE-NEXT: retq 184 ; 185 ; AVX-LABEL: test2b: 186 ; AVX: # %bb.0: 187 ; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 188 ; AVX-NEXT: retq 189 %1 = load <8 x float>, <8 x float>* %B 190 %add = fadd <8 x float> %A, %1 191 %sub = fsub <8 x float> %A, %1 192 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 193 ret <8 x float> %vecinit14 194 } 195 196 define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) { 197 ; SSE-LABEL: test3b: 198 ; SSE: # %bb.0: 199 ; SSE-NEXT: addsubpd (%rdi), %xmm0 200 ; SSE-NEXT: addsubpd 16(%rdi), %xmm1 201 ; SSE-NEXT: retq 202 ; 203 ; AVX-LABEL: test3b: 204 ; AVX: # %bb.0: 205 ; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 206 ; AVX-NEXT: retq 207 %1 = load <4 x double>, <4 x double>* %B 208 %add = fadd <4 x double> %A, %1 209 %sub = fsub <4 x double> %A, %1 210 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 211 ret <4 x double> %vecinit6 212 } 213 214 define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) { 215 ; SSE-LABEL: test4b: 216 ; SSE: # %bb.0: 217 ; SSE-NEXT: addsubpd (%rdi), %xmm0 218 ; SSE-NEXT: retq 219 ; 220 ; AVX-LABEL: test4b: 221 ; AVX: # %bb.0: 222 ; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 223 ; AVX-NEXT: retq 224 %1 = load <2 x double>, <2 x double>* %B 225 %sub = fsub <2 x double> %A, %1 226 %add = fadd <2 x double> %A, %1 227 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3> 228 ret <2 x double> %vecinit2 229 } 230 231 define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) { 232 ; SSE-LABEL: test1c: 233 ; SSE: # %bb.0: 234 ; SSE-NEXT: addsubps (%rdi), %xmm0 235 ; SSE-NEXT: retq 236 ; 237 ; AVX-LABEL: test1c: 238 ; AVX: # %bb.0: 239 ; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 240 ; AVX-NEXT: retq 241 %1 = load <4 x float>, <4 x float>* %B 242 %add = fadd <4 x float> %A, %1 243 %sub = fsub <4 x float> %A, %1 244 %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 245 ret <4 x float> %vecinit6 246 } 247 248 define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) { 249 ; SSE-LABEL: test2c: 250 ; SSE: # %bb.0: 251 ; SSE-NEXT: addsubps (%rdi), %xmm0 252 ; SSE-NEXT: addsubps 16(%rdi), %xmm1 253 ; SSE-NEXT: retq 254 ; 255 ; AVX-LABEL: test2c: 256 ; AVX: # %bb.0: 257 ; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 258 ; AVX-NEXT: retq 259 %1 = load <8 x float>, <8 x float>* %B 260 %add = fadd <8 x float> %A, %1 261 %sub = fsub <8 x float> %A, %1 262 %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 263 ret <8 x float> %vecinit14 264 } 265 266 define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) { 267 ; SSE-LABEL: test3c: 268 ; SSE: # %bb.0: 269 ; SSE-NEXT: addsubpd (%rdi), %xmm0 270 ; SSE-NEXT: addsubpd 16(%rdi), %xmm1 271 ; SSE-NEXT: retq 272 ; 273 ; AVX-LABEL: test3c: 274 ; AVX: # %bb.0: 275 ; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 276 ; AVX-NEXT: retq 277 %1 = load <4 x double>, <4 x double>* %B 278 %add = fadd <4 x double> %A, %1 279 %sub = fsub <4 x double> %A, %1 280 %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 281 ret <4 x double> %vecinit6 282 } 283 284 define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) { 285 ; SSE-LABEL: test4c: 286 ; SSE: # %bb.0: 287 ; SSE-NEXT: addsubpd (%rdi), %xmm0 288 ; SSE-NEXT: retq 289 ; 290 ; AVX-LABEL: test4c: 291 ; AVX: # %bb.0: 292 ; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 293 ; AVX-NEXT: retq 294 %1 = load <2 x double>, <2 x double>* %B 295 %sub = fsub <2 x double> %A, %1 296 %add = fadd <2 x double> %A, %1 297 %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1> 298 ret <2 x double> %vecinit2 299 } 300