1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s 2 3 4 ; Verify that each of the following test cases is folded into a single 5 ; instruction which performs a blend operation. 6 7 define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) { 8 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 9 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 10 %or = or <2 x i64> %shuf1, %shuf2 11 ret <2 x i64> %or 12 } 13 ; CHECK-LABEL: test1 14 ; CHECK-NOT: xorps 15 ; CHECK: movsd 16 ; CHECK-NOT: orps 17 ; CHECK: ret 18 19 20 define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) { 21 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 22 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 23 %or = or <4 x i32> %shuf1, %shuf2 24 ret <4 x i32> %or 25 } 26 ; CHECK-LABEL: test2 27 ; CHECK-NOT: xorps 28 ; CHECK: movsd 29 ; CHECK: ret 30 31 32 define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) { 33 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 34 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 35 %or = or <2 x i64> %shuf1, %shuf2 36 ret <2 x i64> %or 37 } 38 ; CHECK-LABEL: test3 39 ; CHECK-NOT: xorps 40 ; CHECK: movsd 41 ; CHECK-NEXT: ret 42 43 44 define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) { 45 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 46 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 47 %or = or <4 x i32> %shuf1, %shuf2 48 ret <4 x i32> %or 49 } 50 ; CHECK-LABEL: test4 51 ; CHECK-NOT: xorps 52 ; CHECK: movss 53 ; CHECK-NOT: orps 54 ; CHECK: ret 55 56 57 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) { 58 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 59 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 60 %or = or <4 x i32> %shuf1, %shuf2 61 ret <4 x i32> %or 62 } 63 ; CHECK-LABEL: test5 64 ; CHECK-NOT: xorps 65 ; CHECK: movss 66 ; CHECK-NEXT: ret 67 68 69 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) { 70 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 71 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 72 %or = or <4 x i32> %shuf1, %shuf2 73 ret <4 x i32> %or 74 } 75 ; CHECK-LABEL: test6 76 ; CHECK-NOT: xorps 77 ; CHECK: blendps $12 78 ; CHECK-NEXT: ret 79 80 81 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) { 82 %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0> 83 %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1> 84 %or = or <4 x i32> %and1, %and2 85 ret <4 x i32> %or 86 } 87 ; CHECK-LABEL: test7 88 ; CHECK-NOT: xorps 89 ; CHECK: blendps $12 90 ; CHECK-NEXT: ret 91 92 93 define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) { 94 %and1 = and <2 x i64> %a, <i64 -1, i64 0> 95 %and2 = and <2 x i64> %b, <i64 0, i64 -1> 96 %or = or <2 x i64> %and1, %and2 97 ret <2 x i64> %or 98 } 99 ; CHECK-LABEL: test8 100 ; CHECK-NOT: xorps 101 ; CHECK: movsd 102 ; CHECK-NOT: orps 103 ; CHECK: ret 104 105 106 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) { 107 %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1> 108 %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0> 109 %or = or <4 x i32> %and1, %and2 110 ret <4 x i32> %or 111 } 112 ; CHECK-LABEL: test9 113 ; CHECK-NOT: xorps 114 ; CHECK: movsd 115 ; CHECK: ret 116 117 118 define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) { 119 %and1 = and <2 x i64> %a, <i64 0, i64 -1> 120 %and2 = and <2 x i64> %b, <i64 -1, i64 0> 121 %or = or <2 x i64> %and1, %and2 122 ret <2 x i64> %or 123 } 124 ; CHECK-LABEL: test10 125 ; CHECK-NOT: xorps 126 ; CHECK: movsd 127 ; CHECK-NEXT: ret 128 129 130 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) { 131 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0> 132 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1> 133 %or = or <4 x i32> %and1, %and2 134 ret <4 x i32> %or 135 } 136 ; CHECK-LABEL: test11 137 ; CHECK-NOT: xorps 138 ; CHECK: movss 139 ; CHECK-NOT: orps 140 ; CHECK: ret 141 142 143 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) { 144 %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1> 145 %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0> 146 %or = or <4 x i32> %and1, %and2 147 ret <4 x i32> %or 148 } 149 ; CHECK-LABEL: test12 150 ; CHECK-NOT: xorps 151 ; CHECK: movss 152 ; CHECK-NEXT: ret 153 154 155 ; Verify that the following test cases are folded into single shuffles. 156 157 define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) { 158 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4> 159 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 160 %or = or <4 x i32> %shuf1, %shuf2 161 ret <4 x i32> %or 162 } 163 ; CHECK-LABEL: test13 164 ; CHECK-NOT: xorps 165 ; CHECK: shufps 166 ; CHECK-NEXT: ret 167 168 169 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) { 170 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 171 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 172 %or = or <2 x i64> %shuf1, %shuf2 173 ret <2 x i64> %or 174 } 175 ; CHECK-LABEL: test14 176 ; CHECK-NOT: pslldq 177 ; CHECK-NOT: por 178 ; CHECK: punpcklqdq 179 ; CHECK-NEXT: ret 180 181 182 define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) { 183 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1> 184 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4> 185 %or = or <4 x i32> %shuf1, %shuf2 186 ret <4 x i32> %or 187 } 188 ; CHECK-LABEL: test15 189 ; CHECK-NOT: xorps 190 ; CHECK: shufps 191 ; CHECK-NOT: shufps 192 ; CHECK-NOT: orps 193 ; CHECK: ret 194 195 196 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) { 197 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 198 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 199 %or = or <2 x i64> %shuf1, %shuf2 200 ret <2 x i64> %or 201 } 202 ; CHECK-LABEL: test16 203 ; CHECK-NOT: pslldq 204 ; CHECK-NOT: por 205 ; CHECK: punpcklqdq 206 ; CHECK: ret 207 208 209 ; Verify that the dag-combiner does not fold a OR of two shuffles into a single 210 ; shuffle instruction when the shuffle indexes are not compatible. 211 212 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) { 213 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2> 214 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 215 %or = or <4 x i32> %shuf1, %shuf2 216 ret <4 x i32> %or 217 } 218 ; CHECK-LABEL: test17 219 ; CHECK: por 220 ; CHECK-NEXT: ret 221 222 223 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) { 224 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4> 225 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 226 %or = or <4 x i32> %shuf1, %shuf2 227 ret <4 x i32> %or 228 } 229 ; CHECK-LABEL: test18 230 ; CHECK: orps 231 ; CHECK: ret 232 233 234 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { 235 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3> 236 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2> 237 %or = or <4 x i32> %shuf1, %shuf2 238 ret <4 x i32> %or 239 } 240 ; CHECK-LABEL: test19 241 ; CHECK: por 242 ; CHECK-NEXT: ret 243 244 245 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) { 246 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 247 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 248 %or = or <2 x i64> %shuf1, %shuf2 249 ret <2 x i64> %or 250 } 251 ; CHECK-LABEL: test20 252 ; CHECK-NOT: xorps 253 ; CHECK: orps 254 ; CHECK-NEXT: movq 255 ; CHECK-NEXT: ret 256 257 258 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { 259 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 260 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 261 %or = or <2 x i64> %shuf1, %shuf2 262 ret <2 x i64> %or 263 } 264 ; CHECK-LABEL: test21 265 ; CHECK: por 266 ; CHECK-NEXT: pslldq 267 ; CHECK-NEXT: ret 268 269 270