1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X64 4 5 ; Verify that the backend correctly folds a sign/zero extend of a vector where 6 ; elements are all constant values or UNDEFs. 7 ; The backend should be able to optimize all the test functions below into 8 ; simple loads from constant pool of the result. That is because the resulting 9 ; vector should be known at static time. 10 11 define <4 x i16> @test_sext_4i8_4i16() { 12 ; X32-LABEL: test_sext_4i8_4i16: 13 ; X32: # %bb.0: 14 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293] 15 ; X32-NEXT: retl 16 ; 17 ; X64-LABEL: test_sext_4i8_4i16: 18 ; X64: # %bb.0: 19 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293] 20 ; X64-NEXT: retq 21 %1 = insertelement <4 x i8> undef, i8 0, i32 0 22 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 23 %3 = insertelement <4 x i8> %2, i8 2, i32 2 24 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 25 %5 = sext <4 x i8> %4 to <4 x i16> 26 ret <4 x i16> %5 27 } 28 29 define <4 x i16> @test_sext_4i8_4i16_undef() { 30 ; X32-LABEL: test_sext_4i8_4i16_undef: 31 ; X32: # %bb.0: 32 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293> 33 ; X32-NEXT: retl 34 ; 35 ; X64-LABEL: test_sext_4i8_4i16_undef: 36 ; X64: # %bb.0: 37 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293> 38 ; X64-NEXT: retq 39 %1 = insertelement <4 x i8> undef, i8 undef, i32 0 40 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 41 %3 = insertelement <4 x i8> %2, i8 undef, i32 2 42 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 43 %5 = sext <4 x i8> %4 to <4 x i16> 44 ret <4 x i16> %5 45 } 46 47 define <4 x i32> @test_sext_4i8_4i32() { 48 ; X32-LABEL: test_sext_4i8_4i32: 49 ; X32: # %bb.0: 50 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293] 51 ; X32-NEXT: retl 52 ; 53 ; X64-LABEL: test_sext_4i8_4i32: 54 ; X64: # %bb.0: 55 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293] 56 ; X64-NEXT: retq 57 %1 = insertelement <4 x i8> undef, i8 0, i32 0 58 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 59 %3 = insertelement <4 x i8> %2, i8 2, i32 2 60 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 61 %5 = sext <4 x i8> %4 to <4 x i32> 62 ret <4 x i32> %5 63 } 64 65 define <4 x i32> @test_sext_4i8_4i32_undef() { 66 ; X32-LABEL: test_sext_4i8_4i32_undef: 67 ; X32: # %bb.0: 68 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293> 69 ; X32-NEXT: retl 70 ; 71 ; X64-LABEL: test_sext_4i8_4i32_undef: 72 ; X64: # %bb.0: 73 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293> 74 ; X64-NEXT: retq 75 %1 = insertelement <4 x i8> undef, i8 undef, i32 0 76 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 77 %3 = insertelement <4 x i8> %2, i8 undef, i32 2 78 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 79 %5 = sext <4 x i8> %4 to <4 x i32> 80 ret <4 x i32> %5 81 } 82 83 define <4 x i64> @test_sext_4i8_4i64() { 84 ; X32-LABEL: test_sext_4i8_4i64: 85 ; X32: # %bb.0: 86 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,4294967295,4294967295,2,0,4294967293,4294967295] 87 ; X32-NEXT: retl 88 ; 89 ; X64-LABEL: test_sext_4i8_4i64: 90 ; X64: # %bb.0: 91 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,2,18446744073709551613] 92 ; X64-NEXT: retq 93 %1 = insertelement <4 x i8> undef, i8 0, i32 0 94 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 95 %3 = insertelement <4 x i8> %2, i8 2, i32 2 96 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 97 %5 = sext <4 x i8> %4 to <4 x i64> 98 ret <4 x i64> %5 99 } 100 101 define <4 x i64> @test_sext_4i8_4i64_undef() { 102 ; X32-LABEL: test_sext_4i8_4i64_undef: 103 ; X32: # %bb.0: 104 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = <u,u,4294967295,4294967295,u,u,4294967293,4294967295> 105 ; X32-NEXT: retl 106 ; 107 ; X64-LABEL: test_sext_4i8_4i64_undef: 108 ; X64: # %bb.0: 109 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = <u,18446744073709551615,u,18446744073709551613> 110 ; X64-NEXT: retq 111 %1 = insertelement <4 x i8> undef, i8 undef, i32 0 112 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 113 %3 = insertelement <4 x i8> %2, i8 undef, i32 2 114 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 115 %5 = sext <4 x i8> %4 to <4 x i64> 116 ret <4 x i64> %5 117 } 118 119 define <8 x i16> @test_sext_8i8_8i16() { 120 ; X32-LABEL: test_sext_8i8_8i16: 121 ; X32: # %bb.0: 122 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,65535,2,65533,u,u,u,u> 123 ; X32-NEXT: retl 124 ; 125 ; X64-LABEL: test_sext_8i8_8i16: 126 ; X64: # %bb.0: 127 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,65535,2,65533,u,u,u,u> 128 ; X64-NEXT: retq 129 %1 = insertelement <8 x i8> undef, i8 0, i32 0 130 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 131 %3 = insertelement <8 x i8> %2, i8 2, i32 2 132 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 133 %5 = insertelement <8 x i8> %4, i8 4, i32 4 134 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 135 %7 = insertelement <8 x i8> %6, i8 6, i32 6 136 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 137 %9 = sext <8 x i8> %4 to <8 x i16> 138 ret <8 x i16> %9 139 } 140 141 define <8 x i32> @test_sext_8i8_8i32() { 142 ; X32-LABEL: test_sext_8i8_8i32: 143 ; X32: # %bb.0: 144 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = <0,4294967295,2,4294967293,u,u,u,u> 145 ; X32-NEXT: retl 146 ; 147 ; X64-LABEL: test_sext_8i8_8i32: 148 ; X64: # %bb.0: 149 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = <0,4294967295,2,4294967293,u,u,u,u> 150 ; X64-NEXT: retq 151 %1 = insertelement <8 x i8> undef, i8 0, i32 0 152 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 153 %3 = insertelement <8 x i8> %2, i8 2, i32 2 154 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 155 %5 = insertelement <8 x i8> %4, i8 4, i32 4 156 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 157 %7 = insertelement <8 x i8> %6, i8 6, i32 6 158 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 159 %9 = sext <8 x i8> %4 to <8 x i32> 160 ret <8 x i32> %9 161 } 162 163 define <8 x i16> @test_sext_8i8_8i16_undef() { 164 ; X32-LABEL: test_sext_8i8_8i16_undef: 165 ; X32: # %bb.0: 166 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,u,u,u> 167 ; X32-NEXT: retl 168 ; 169 ; X64-LABEL: test_sext_8i8_8i16_undef: 170 ; X64: # %bb.0: 171 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,u,u,u> 172 ; X64-NEXT: retq 173 %1 = insertelement <8 x i8> undef, i8 undef, i32 0 174 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 175 %3 = insertelement <8 x i8> %2, i8 undef, i32 2 176 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 177 %5 = insertelement <8 x i8> %4, i8 undef, i32 4 178 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 179 %7 = insertelement <8 x i8> %6, i8 undef, i32 6 180 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 181 %9 = sext <8 x i8> %4 to <8 x i16> 182 ret <8 x i16> %9 183 } 184 185 define <8 x i32> @test_sext_8i8_8i32_undef() { 186 ; X32-LABEL: test_sext_8i8_8i32_undef: 187 ; X32: # %bb.0: 188 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,u,u,u,u,u> 189 ; X32-NEXT: retl 190 ; 191 ; X64-LABEL: test_sext_8i8_8i32_undef: 192 ; X64: # %bb.0: 193 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,u,u,u,u,u> 194 ; X64-NEXT: retq 195 %1 = insertelement <8 x i8> undef, i8 0, i32 0 196 %2 = insertelement <8 x i8> %1, i8 undef, i32 1 197 %3 = insertelement <8 x i8> %2, i8 2, i32 2 198 %4 = insertelement <8 x i8> %3, i8 undef, i32 3 199 %5 = insertelement <8 x i8> %4, i8 4, i32 4 200 %6 = insertelement <8 x i8> %5, i8 undef, i32 5 201 %7 = insertelement <8 x i8> %6, i8 6, i32 6 202 %8 = insertelement <8 x i8> %7, i8 undef, i32 7 203 %9 = sext <8 x i8> %4 to <8 x i32> 204 ret <8 x i32> %9 205 } 206 207 define <4 x i16> @test_zext_4i8_4i16() { 208 ; X32-LABEL: test_zext_4i8_4i16: 209 ; X32: # %bb.0: 210 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253] 211 ; X32-NEXT: retl 212 ; 213 ; X64-LABEL: test_zext_4i8_4i16: 214 ; X64: # %bb.0: 215 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253] 216 ; X64-NEXT: retq 217 %1 = insertelement <4 x i8> undef, i8 0, i32 0 218 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 219 %3 = insertelement <4 x i8> %2, i8 2, i32 2 220 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 221 %5 = zext <4 x i8> %4 to <4 x i16> 222 ret <4 x i16> %5 223 } 224 225 define <4 x i32> @test_zext_4i8_4i32() { 226 ; X32-LABEL: test_zext_4i8_4i32: 227 ; X32: # %bb.0: 228 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253] 229 ; X32-NEXT: retl 230 ; 231 ; X64-LABEL: test_zext_4i8_4i32: 232 ; X64: # %bb.0: 233 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253] 234 ; X64-NEXT: retq 235 %1 = insertelement <4 x i8> undef, i8 0, i32 0 236 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 237 %3 = insertelement <4 x i8> %2, i8 2, i32 2 238 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 239 %5 = zext <4 x i8> %4 to <4 x i32> 240 ret <4 x i32> %5 241 } 242 243 define <4 x i64> @test_zext_4i8_4i64() { 244 ; X32-LABEL: test_zext_4i8_4i64: 245 ; X32: # %bb.0: 246 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,253,0] 247 ; X32-NEXT: retl 248 ; 249 ; X64-LABEL: test_zext_4i8_4i64: 250 ; X64: # %bb.0: 251 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253] 252 ; X64-NEXT: retq 253 %1 = insertelement <4 x i8> undef, i8 0, i32 0 254 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 255 %3 = insertelement <4 x i8> %2, i8 2, i32 2 256 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 257 %5 = zext <4 x i8> %4 to <4 x i64> 258 ret <4 x i64> %5 259 } 260 261 define <4 x i16> @test_zext_4i8_4i16_undef() { 262 ; X32-LABEL: test_zext_4i8_4i16_undef: 263 ; X32: # %bb.0: 264 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253> 265 ; X32-NEXT: retl 266 ; 267 ; X64-LABEL: test_zext_4i8_4i16_undef: 268 ; X64: # %bb.0: 269 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253> 270 ; X64-NEXT: retq 271 %1 = insertelement <4 x i8> undef, i8 undef, i32 0 272 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 273 %3 = insertelement <4 x i8> %2, i8 undef, i32 2 274 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 275 %5 = zext <4 x i8> %4 to <4 x i16> 276 ret <4 x i16> %5 277 } 278 279 define <4 x i32> @test_zext_4i8_4i32_undef() { 280 ; X32-LABEL: test_zext_4i8_4i32_undef: 281 ; X32: # %bb.0: 282 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,u,2,u> 283 ; X32-NEXT: retl 284 ; 285 ; X64-LABEL: test_zext_4i8_4i32_undef: 286 ; X64: # %bb.0: 287 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,u,2,u> 288 ; X64-NEXT: retq 289 %1 = insertelement <4 x i8> undef, i8 0, i32 0 290 %2 = insertelement <4 x i8> %1, i8 undef, i32 1 291 %3 = insertelement <4 x i8> %2, i8 2, i32 2 292 %4 = insertelement <4 x i8> %3, i8 undef, i32 3 293 %5 = zext <4 x i8> %4 to <4 x i32> 294 ret <4 x i32> %5 295 } 296 297 define <4 x i64> @test_zext_4i8_4i64_undef() { 298 ; X32-LABEL: test_zext_4i8_4i64_undef: 299 ; X32: # %bb.0: 300 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = <u,u,255,0,2,0,u,u> 301 ; X32-NEXT: retl 302 ; 303 ; X64-LABEL: test_zext_4i8_4i64_undef: 304 ; X64: # %bb.0: 305 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = <u,255,2,u> 306 ; X64-NEXT: retq 307 %1 = insertelement <4 x i8> undef, i8 undef, i32 0 308 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 309 %3 = insertelement <4 x i8> %2, i8 2, i32 2 310 %4 = insertelement <4 x i8> %3, i8 undef, i32 3 311 %5 = zext <4 x i8> %4 to <4 x i64> 312 ret <4 x i64> %5 313 } 314 315 define <8 x i16> @test_zext_8i8_8i16() { 316 ; X32-LABEL: test_zext_8i8_8i16: 317 ; X32: # %bb.0: 318 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253,4,251,6,249] 319 ; X32-NEXT: retl 320 ; 321 ; X64-LABEL: test_zext_8i8_8i16: 322 ; X64: # %bb.0: 323 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253,4,251,6,249] 324 ; X64-NEXT: retq 325 %1 = insertelement <8 x i8> undef, i8 0, i32 0 326 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 327 %3 = insertelement <8 x i8> %2, i8 2, i32 2 328 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 329 %5 = insertelement <8 x i8> %4, i8 4, i32 4 330 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 331 %7 = insertelement <8 x i8> %6, i8 6, i32 6 332 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 333 %9 = zext <8 x i8> %8 to <8 x i16> 334 ret <8 x i16> %9 335 } 336 337 define <8 x i32> @test_zext_8i8_8i32() { 338 ; X32-LABEL: test_zext_8i8_8i32: 339 ; X32: # %bb.0: 340 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253,4,251,6,249] 341 ; X32-NEXT: retl 342 ; 343 ; X64-LABEL: test_zext_8i8_8i32: 344 ; X64: # %bb.0: 345 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253,4,251,6,249] 346 ; X64-NEXT: retq 347 %1 = insertelement <8 x i8> undef, i8 0, i32 0 348 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 349 %3 = insertelement <8 x i8> %2, i8 2, i32 2 350 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 351 %5 = insertelement <8 x i8> %4, i8 4, i32 4 352 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 353 %7 = insertelement <8 x i8> %6, i8 6, i32 6 354 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 355 %9 = zext <8 x i8> %8 to <8 x i32> 356 ret <8 x i32> %9 357 } 358 359 define <8 x i16> @test_zext_8i8_8i16_undef() { 360 ; X32-LABEL: test_zext_8i8_8i16_undef: 361 ; X32: # %bb.0: 362 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253,u,251,u,249> 363 ; X32-NEXT: retl 364 ; 365 ; X64-LABEL: test_zext_8i8_8i16_undef: 366 ; X64: # %bb.0: 367 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253,u,251,u,249> 368 ; X64-NEXT: retq 369 %1 = insertelement <8 x i8> undef, i8 undef, i32 0 370 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 371 %3 = insertelement <8 x i8> %2, i8 undef, i32 2 372 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 373 %5 = insertelement <8 x i8> %4, i8 undef, i32 4 374 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 375 %7 = insertelement <8 x i8> %6, i8 undef, i32 6 376 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 377 %9 = zext <8 x i8> %8 to <8 x i16> 378 ret <8 x i16> %9 379 } 380 381 define <8 x i32> @test_zext_8i8_8i32_undef() { 382 ; X32-LABEL: test_zext_8i8_8i32_undef: 383 ; X32: # %bb.0: 384 ; X32-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u> 385 ; X32-NEXT: retl 386 ; 387 ; X64-LABEL: test_zext_8i8_8i32_undef: 388 ; X64: # %bb.0: 389 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u> 390 ; X64-NEXT: retq 391 %1 = insertelement <8 x i8> undef, i8 0, i32 0 392 %2 = insertelement <8 x i8> %1, i8 undef, i32 1 393 %3 = insertelement <8 x i8> %2, i8 2, i32 2 394 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 395 %5 = insertelement <8 x i8> %4, i8 4, i32 4 396 %6 = insertelement <8 x i8> %5, i8 undef, i32 5 397 %7 = insertelement <8 x i8> %6, i8 6, i32 6 398 %8 = insertelement <8 x i8> %7, i8 undef, i32 7 399 %9 = zext <8 x i8> %8 to <8 x i32> 400 ret <8 x i32> %9 401 } 402