1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE 3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX 4 5 ; Test ADDSUB ISel patterns. 6 7 ; Functions below are obtained from the following source: 8 ; 9 ; typedef double double2 __attribute__((ext_vector_type(2))); 10 ; typedef double double4 __attribute__((ext_vector_type(4))); 11 ; typedef float float4 __attribute__((ext_vector_type(4))); 12 ; typedef float float8 __attribute__((ext_vector_type(8))); 13 ; 14 ; float4 test1(float4 A, float4 B) { 15 ; float4 X = A - B; 16 ; float4 Y = A + B; 17 ; return (float4){X[0], Y[1], X[2], Y[3]}; 18 ; } 19 ; 20 ; float8 test2(float8 A, float8 B) { 21 ; float8 X = A - B; 22 ; float8 Y = A + B; 23 ; return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]}; 24 ; } 25 ; 26 ; double4 test3(double4 A, double4 B) { 27 ; double4 X = A - B; 28 ; double4 Y = A + B; 29 ; return (double4){X[0], Y[1], X[2], Y[3]}; 30 ; } 31 ; 32 ; double2 test4(double2 A, double2 B) { 33 ; double2 X = A - B; 34 ; double2 Y = A + B; 35 ; return (double2){X[0], Y[1]}; 36 ; } 37 38 define <4 x float> @test1(<4 x float> %A, <4 x float> %B) { 39 ; SSE-LABEL: test1: 40 ; SSE: # BB#0: 41 ; SSE-NEXT: addsubps %xmm1, %xmm0 42 ; SSE-NEXT: retq 43 ; 44 ; AVX-LABEL: test1: 45 ; AVX: # BB#0: 46 ; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 47 ; AVX-NEXT: retq 48 %sub = fsub <4 x float> %A, %B 49 %add = fadd <4 x float> %A, %B 50 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 51 ret <4 x float> %vecinit6 52 } 53 54 define <8 x float> @test2(<8 x float> %A, <8 x float> %B) { 55 ; SSE-LABEL: test2: 56 ; SSE: # BB#0: 57 ; SSE-NEXT: addsubps %xmm2, %xmm0 58 ; SSE-NEXT: addsubps %xmm3, %xmm1 59 ; SSE-NEXT: retq 60 ; 61 ; AVX-LABEL: test2: 62 ; AVX: # BB#0: 63 ; AVX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 64 ; AVX-NEXT: retq 65 %sub = fsub <8 x float> %A, %B 66 %add = fadd <8 x float> %A, %B 67 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 68 ret <8 x float> %vecinit14 69 } 70 71 define <4 x double> @test3(<4 x double> %A, <4 x double> %B) { 72 ; SSE-LABEL: test3: 73 ; SSE: # BB#0: 74 ; SSE-NEXT: addsubpd %xmm2, %xmm0 75 ; SSE-NEXT: addsubpd %xmm3, %xmm1 76 ; SSE-NEXT: retq 77 ; 78 ; AVX-LABEL: test3: 79 ; AVX: # BB#0: 80 ; AVX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 81 ; AVX-NEXT: retq 82 %sub = fsub <4 x double> %A, %B 83 %add = fadd <4 x double> %A, %B 84 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 85 ret <4 x double> %vecinit6 86 } 87 88 define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 { 89 ; SSE-LABEL: test4: 90 ; SSE: # BB#0: 91 ; SSE-NEXT: addsubpd %xmm1, %xmm0 92 ; SSE-NEXT: retq 93 ; 94 ; AVX-LABEL: test4: 95 ; AVX: # BB#0: 96 ; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 97 ; AVX-NEXT: retq 98 %add = fadd <2 x double> %A, %B 99 %sub = fsub <2 x double> %A, %B 100 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3> 101 ret <2 x double> %vecinit2 102 } 103 104 define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) { 105 ; SSE-LABEL: test1b: 106 ; SSE: # BB#0: 107 ; SSE-NEXT: addsubps (%rdi), %xmm0 108 ; SSE-NEXT: retq 109 ; 110 ; AVX-LABEL: test1b: 111 ; AVX: # BB#0: 112 ; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 113 ; AVX-NEXT: retq 114 %1 = load <4 x float>, <4 x float>* %B 115 %add = fadd <4 x float> %A, %1 116 %sub = fsub <4 x float> %A, %1 117 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 118 ret <4 x float> %vecinit6 119 } 120 121 define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) { 122 ; SSE-LABEL: test2b: 123 ; SSE: # BB#0: 124 ; SSE-NEXT: addsubps (%rdi), %xmm0 125 ; SSE-NEXT: addsubps 16(%rdi), %xmm1 126 ; SSE-NEXT: retq 127 ; 128 ; AVX-LABEL: test2b: 129 ; AVX: # BB#0: 130 ; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 131 ; AVX-NEXT: retq 132 %1 = load <8 x float>, <8 x float>* %B 133 %add = fadd <8 x float> %A, %1 134 %sub = fsub <8 x float> %A, %1 135 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 136 ret <8 x float> %vecinit14 137 } 138 139 define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) { 140 ; SSE-LABEL: test3b: 141 ; SSE: # BB#0: 142 ; SSE-NEXT: addsubpd (%rdi), %xmm0 143 ; SSE-NEXT: addsubpd 16(%rdi), %xmm1 144 ; SSE-NEXT: retq 145 ; 146 ; AVX-LABEL: test3b: 147 ; AVX: # BB#0: 148 ; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 149 ; AVX-NEXT: retq 150 %1 = load <4 x double>, <4 x double>* %B 151 %add = fadd <4 x double> %A, %1 152 %sub = fsub <4 x double> %A, %1 153 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 154 ret <4 x double> %vecinit6 155 } 156 157 define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) { 158 ; SSE-LABEL: test4b: 159 ; SSE: # BB#0: 160 ; SSE-NEXT: addsubpd (%rdi), %xmm0 161 ; SSE-NEXT: retq 162 ; 163 ; AVX-LABEL: test4b: 164 ; AVX: # BB#0: 165 ; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 166 ; AVX-NEXT: retq 167 %1 = load <2 x double>, <2 x double>* %B 168 %sub = fsub <2 x double> %A, %1 169 %add = fadd <2 x double> %A, %1 170 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3> 171 ret <2 x double> %vecinit2 172 } 173 174 define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) { 175 ; SSE-LABEL: test1c: 176 ; SSE: # BB#0: 177 ; SSE-NEXT: addsubps (%rdi), %xmm0 178 ; SSE-NEXT: retq 179 ; 180 ; AVX-LABEL: test1c: 181 ; AVX: # BB#0: 182 ; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 183 ; AVX-NEXT: retq 184 %1 = load <4 x float>, <4 x float>* %B 185 %add = fadd <4 x float> %A, %1 186 %sub = fsub <4 x float> %A, %1 187 %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 188 ret <4 x float> %vecinit6 189 } 190 191 define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) { 192 ; SSE-LABEL: test2c: 193 ; SSE: # BB#0: 194 ; SSE-NEXT: addsubps (%rdi), %xmm0 195 ; SSE-NEXT: addsubps 16(%rdi), %xmm1 196 ; SSE-NEXT: retq 197 ; 198 ; AVX-LABEL: test2c: 199 ; AVX: # BB#0: 200 ; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 201 ; AVX-NEXT: retq 202 %1 = load <8 x float>, <8 x float>* %B 203 %add = fadd <8 x float> %A, %1 204 %sub = fsub <8 x float> %A, %1 205 %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 206 ret <8 x float> %vecinit14 207 } 208 209 define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) { 210 ; SSE-LABEL: test3c: 211 ; SSE: # BB#0: 212 ; SSE-NEXT: addsubpd (%rdi), %xmm0 213 ; SSE-NEXT: addsubpd 16(%rdi), %xmm1 214 ; SSE-NEXT: retq 215 ; 216 ; AVX-LABEL: test3c: 217 ; AVX: # BB#0: 218 ; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 219 ; AVX-NEXT: retq 220 %1 = load <4 x double>, <4 x double>* %B 221 %add = fadd <4 x double> %A, %1 222 %sub = fsub <4 x double> %A, %1 223 %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 224 ret <4 x double> %vecinit6 225 } 226 227 define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) { 228 ; SSE-LABEL: test4c: 229 ; SSE: # BB#0: 230 ; SSE-NEXT: addsubpd (%rdi), %xmm0 231 ; SSE-NEXT: retq 232 ; 233 ; AVX-LABEL: test4c: 234 ; AVX: # BB#0: 235 ; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 236 ; AVX-NEXT: retq 237 %1 = load <2 x double>, <2 x double>* %B 238 %sub = fsub <2 x double> %A, %1 239 %add = fadd <2 x double> %A, %1 240 %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1> 241 ret <2 x double> %vecinit2 242 } 243