Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X64
      4 
      5 ; Verify that the backend correctly folds a sign/zero extend of a vector where
      6 ; elements are all constant values or UNDEFs.
      7 ; The backend should be able to optimize all the test functions below into
      8 ; simple loads from constant pool of the result. That is because the resulting
      9 ; vector should be known at static time.
     10 
     11 define <4 x i16> @test_sext_4i8_4i16() {
     12 ; X32-LABEL: test_sext_4i8_4i16:
     13 ; X32:       # %bb.0:
     14 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293]
     15 ; X32-NEXT:    retl
     16 ;
     17 ; X64-LABEL: test_sext_4i8_4i16:
     18 ; X64:       # %bb.0:
     19 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293]
     20 ; X64-NEXT:    retq
     21   %1 = insertelement <4 x i8> undef, i8 0, i32 0
     22   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
     23   %3 = insertelement <4 x i8> %2, i8 2, i32 2
     24   %4 = insertelement <4 x i8> %3, i8 -3, i32 3
     25   %5 = sext <4 x i8> %4 to <4 x i16>
     26   ret <4 x i16> %5
     27 }
     28 
     29 define <4 x i16> @test_sext_4i8_4i16_undef() {
     30 ; X32-LABEL: test_sext_4i8_4i16_undef:
     31 ; X32:       # %bb.0:
     32 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293>
     33 ; X32-NEXT:    retl
     34 ;
     35 ; X64-LABEL: test_sext_4i8_4i16_undef:
     36 ; X64:       # %bb.0:
     37 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293>
     38 ; X64-NEXT:    retq
     39   %1 = insertelement <4 x i8> undef, i8 undef, i32 0
     40   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
     41   %3 = insertelement <4 x i8> %2, i8 undef, i32 2
     42   %4 = insertelement <4 x i8> %3, i8 -3, i32 3
     43   %5 = sext <4 x i8> %4 to <4 x i16>
     44   ret <4 x i16> %5
     45 }
     46 
     47 define <4 x i32> @test_sext_4i8_4i32() {
     48 ; X32-LABEL: test_sext_4i8_4i32:
     49 ; X32:       # %bb.0:
     50 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293]
     51 ; X32-NEXT:    retl
     52 ;
     53 ; X64-LABEL: test_sext_4i8_4i32:
     54 ; X64:       # %bb.0:
     55 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293]
     56 ; X64-NEXT:    retq
     57   %1 = insertelement <4 x i8> undef, i8 0, i32 0
     58   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
     59   %3 = insertelement <4 x i8> %2, i8 2, i32 2
     60   %4 = insertelement <4 x i8> %3, i8 -3, i32 3
     61   %5 = sext <4 x i8> %4 to <4 x i32>
     62   ret <4 x i32> %5
     63 }
     64 
     65 define <4 x i32> @test_sext_4i8_4i32_undef() {
     66 ; X32-LABEL: test_sext_4i8_4i32_undef:
     67 ; X32:       # %bb.0:
     68 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293>
     69 ; X32-NEXT:    retl
     70 ;
     71 ; X64-LABEL: test_sext_4i8_4i32_undef:
     72 ; X64:       # %bb.0:
     73 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293>
     74 ; X64-NEXT:    retq
     75   %1 = insertelement <4 x i8> undef, i8 undef, i32 0
     76   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
     77   %3 = insertelement <4 x i8> %2, i8 undef, i32 2
     78   %4 = insertelement <4 x i8> %3, i8 -3, i32 3
     79   %5 = sext <4 x i8> %4 to <4 x i32>
     80   ret <4 x i32> %5
     81 }
     82 
     83 define <4 x i64> @test_sext_4i8_4i64() {
     84 ; X32-LABEL: test_sext_4i8_4i64:
     85 ; X32:       # %bb.0:
     86 ; X32-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,4294967295,4294967295,2,0,4294967293,4294967295]
     87 ; X32-NEXT:    retl
     88 ;
     89 ; X64-LABEL: test_sext_4i8_4i64:
     90 ; X64:       # %bb.0:
     91 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,2,18446744073709551613]
     92 ; X64-NEXT:    retq
     93   %1 = insertelement <4 x i8> undef, i8 0, i32 0
     94   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
     95   %3 = insertelement <4 x i8> %2, i8 2, i32 2
     96   %4 = insertelement <4 x i8> %3, i8 -3, i32 3
     97   %5 = sext <4 x i8> %4 to <4 x i64>
     98   ret <4 x i64> %5
     99 }
    100 
    101 define <4 x i64> @test_sext_4i8_4i64_undef() {
    102 ; X32-LABEL: test_sext_4i8_4i64_undef:
    103 ; X32:       # %bb.0:
    104 ; X32-NEXT:    vmovaps {{.*#+}} ymm0 = <u,u,4294967295,4294967295,u,u,4294967293,4294967295>
    105 ; X32-NEXT:    retl
    106 ;
    107 ; X64-LABEL: test_sext_4i8_4i64_undef:
    108 ; X64:       # %bb.0:
    109 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = <u,18446744073709551615,u,18446744073709551613>
    110 ; X64-NEXT:    retq
    111   %1 = insertelement <4 x i8> undef, i8 undef, i32 0
    112   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
    113   %3 = insertelement <4 x i8> %2, i8 undef, i32 2
    114   %4 = insertelement <4 x i8> %3, i8 -3, i32 3
    115   %5 = sext <4 x i8> %4 to <4 x i64>
    116   ret <4 x i64> %5
    117 }
    118 
    119 define <8 x i16> @test_sext_8i8_8i16() {
    120 ; X32-LABEL: test_sext_8i8_8i16:
    121 ; X32:       # %bb.0:
    122 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = <0,65535,2,65533,u,u,u,u>
    123 ; X32-NEXT:    retl
    124 ;
    125 ; X64-LABEL: test_sext_8i8_8i16:
    126 ; X64:       # %bb.0:
    127 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = <0,65535,2,65533,u,u,u,u>
    128 ; X64-NEXT:    retq
    129   %1 = insertelement <8 x i8> undef, i8 0, i32 0
    130   %2 = insertelement <8 x i8> %1, i8 -1, i32 1
    131   %3 = insertelement <8 x i8> %2, i8 2, i32 2
    132   %4 = insertelement <8 x i8> %3, i8 -3, i32 3
    133   %5 = insertelement <8 x i8> %4, i8 4, i32 4
    134   %6 = insertelement <8 x i8> %5, i8 -5, i32 5
    135   %7 = insertelement <8 x i8> %6, i8 6, i32 6
    136   %8 = insertelement <8 x i8> %7, i8 -7, i32 7
    137   %9 = sext <8 x i8> %4 to <8 x i16>
    138   ret <8 x i16> %9
    139 }
    140 
    141 define <8 x i32> @test_sext_8i8_8i32() {
    142 ; X32-LABEL: test_sext_8i8_8i32:
    143 ; X32:       # %bb.0:
    144 ; X32-NEXT:    vmovaps {{.*#+}} ymm0 = <0,4294967295,2,4294967293,u,u,u,u>
    145 ; X32-NEXT:    retl
    146 ;
    147 ; X64-LABEL: test_sext_8i8_8i32:
    148 ; X64:       # %bb.0:
    149 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = <0,4294967295,2,4294967293,u,u,u,u>
    150 ; X64-NEXT:    retq
    151   %1 = insertelement <8 x i8> undef, i8 0, i32 0
    152   %2 = insertelement <8 x i8> %1, i8 -1, i32 1
    153   %3 = insertelement <8 x i8> %2, i8 2, i32 2
    154   %4 = insertelement <8 x i8> %3, i8 -3, i32 3
    155   %5 = insertelement <8 x i8> %4, i8 4, i32 4
    156   %6 = insertelement <8 x i8> %5, i8 -5, i32 5
    157   %7 = insertelement <8 x i8> %6, i8 6, i32 6
    158   %8 = insertelement <8 x i8> %7, i8 -7, i32 7
    159   %9 = sext <8 x i8> %4 to <8 x i32>
    160   ret <8 x i32> %9
    161 }
    162 
    163 define <8 x i16> @test_sext_8i8_8i16_undef() {
    164 ; X32-LABEL: test_sext_8i8_8i16_undef:
    165 ; X32:       # %bb.0:
    166 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,u,u,u>
    167 ; X32-NEXT:    retl
    168 ;
    169 ; X64-LABEL: test_sext_8i8_8i16_undef:
    170 ; X64:       # %bb.0:
    171 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,u,u,u>
    172 ; X64-NEXT:    retq
    173   %1 = insertelement <8 x i8> undef, i8 undef, i32 0
    174   %2 = insertelement <8 x i8> %1, i8 -1, i32 1
    175   %3 = insertelement <8 x i8> %2, i8 undef, i32 2
    176   %4 = insertelement <8 x i8> %3, i8 -3, i32 3
    177   %5 = insertelement <8 x i8> %4, i8 undef, i32 4
    178   %6 = insertelement <8 x i8> %5, i8 -5, i32 5
    179   %7 = insertelement <8 x i8> %6, i8 undef, i32 6
    180   %8 = insertelement <8 x i8> %7, i8 -7, i32 7
    181   %9 = sext <8 x i8> %4 to <8 x i16>
    182   ret <8 x i16> %9
    183 }
    184 
    185 define <8 x i32> @test_sext_8i8_8i32_undef() {
    186 ; X32-LABEL: test_sext_8i8_8i32_undef:
    187 ; X32:       # %bb.0:
    188 ; X32-NEXT:    vmovaps {{.*#+}} ymm0 = <0,u,2,u,u,u,u,u>
    189 ; X32-NEXT:    retl
    190 ;
    191 ; X64-LABEL: test_sext_8i8_8i32_undef:
    192 ; X64:       # %bb.0:
    193 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = <0,u,2,u,u,u,u,u>
    194 ; X64-NEXT:    retq
    195   %1 = insertelement <8 x i8> undef, i8 0, i32 0
    196   %2 = insertelement <8 x i8> %1, i8 undef, i32 1
    197   %3 = insertelement <8 x i8> %2, i8 2, i32 2
    198   %4 = insertelement <8 x i8> %3, i8 undef, i32 3
    199   %5 = insertelement <8 x i8> %4, i8 4, i32 4
    200   %6 = insertelement <8 x i8> %5, i8 undef, i32 5
    201   %7 = insertelement <8 x i8> %6, i8 6, i32 6
    202   %8 = insertelement <8 x i8> %7, i8 undef, i32 7
    203   %9 = sext <8 x i8> %4 to <8 x i32>
    204   ret <8 x i32> %9
    205 }
    206 
    207 define <4 x i16> @test_zext_4i8_4i16() {
    208 ; X32-LABEL: test_zext_4i8_4i16:
    209 ; X32:       # %bb.0:
    210 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,2,253]
    211 ; X32-NEXT:    retl
    212 ;
    213 ; X64-LABEL: test_zext_4i8_4i16:
    214 ; X64:       # %bb.0:
    215 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,2,253]
    216 ; X64-NEXT:    retq
    217   %1 = insertelement <4 x i8> undef, i8 0, i32 0
    218   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
    219   %3 = insertelement <4 x i8> %2, i8 2, i32 2
    220   %4 = insertelement <4 x i8> %3, i8 -3, i32 3
    221   %5 = zext <4 x i8> %4 to <4 x i16>
    222   ret <4 x i16> %5
    223 }
    224 
    225 define <4 x i32> @test_zext_4i8_4i32() {
    226 ; X32-LABEL: test_zext_4i8_4i32:
    227 ; X32:       # %bb.0:
    228 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,2,253]
    229 ; X32-NEXT:    retl
    230 ;
    231 ; X64-LABEL: test_zext_4i8_4i32:
    232 ; X64:       # %bb.0:
    233 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,2,253]
    234 ; X64-NEXT:    retq
    235   %1 = insertelement <4 x i8> undef, i8 0, i32 0
    236   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
    237   %3 = insertelement <4 x i8> %2, i8 2, i32 2
    238   %4 = insertelement <4 x i8> %3, i8 -3, i32 3
    239   %5 = zext <4 x i8> %4 to <4 x i32>
    240   ret <4 x i32> %5
    241 }
    242 
    243 define <4 x i64> @test_zext_4i8_4i64() {
    244 ; X32-LABEL: test_zext_4i8_4i64:
    245 ; X32:       # %bb.0:
    246 ; X32-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,253,0]
    247 ; X32-NEXT:    retl
    248 ;
    249 ; X64-LABEL: test_zext_4i8_4i64:
    250 ; X64:       # %bb.0:
    251 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,2,253]
    252 ; X64-NEXT:    retq
    253   %1 = insertelement <4 x i8> undef, i8 0, i32 0
    254   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
    255   %3 = insertelement <4 x i8> %2, i8 2, i32 2
    256   %4 = insertelement <4 x i8> %3, i8 -3, i32 3
    257   %5 = zext <4 x i8> %4 to <4 x i64>
    258   ret <4 x i64> %5
    259 }
    260 
    261 define <4 x i16> @test_zext_4i8_4i16_undef() {
    262 ; X32-LABEL: test_zext_4i8_4i16_undef:
    263 ; X32:       # %bb.0:
    264 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = <u,255,u,253>
    265 ; X32-NEXT:    retl
    266 ;
    267 ; X64-LABEL: test_zext_4i8_4i16_undef:
    268 ; X64:       # %bb.0:
    269 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = <u,255,u,253>
    270 ; X64-NEXT:    retq
    271   %1 = insertelement <4 x i8> undef, i8 undef, i32 0
    272   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
    273   %3 = insertelement <4 x i8> %2, i8 undef, i32 2
    274   %4 = insertelement <4 x i8> %3, i8 -3, i32 3
    275   %5 = zext <4 x i8> %4 to <4 x i16>
    276   ret <4 x i16> %5
    277 }
    278 
    279 define <4 x i32> @test_zext_4i8_4i32_undef() {
    280 ; X32-LABEL: test_zext_4i8_4i32_undef:
    281 ; X32:       # %bb.0:
    282 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = <0,u,2,u>
    283 ; X32-NEXT:    retl
    284 ;
    285 ; X64-LABEL: test_zext_4i8_4i32_undef:
    286 ; X64:       # %bb.0:
    287 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = <0,u,2,u>
    288 ; X64-NEXT:    retq
    289   %1 = insertelement <4 x i8> undef, i8 0, i32 0
    290   %2 = insertelement <4 x i8> %1, i8 undef, i32 1
    291   %3 = insertelement <4 x i8> %2, i8 2, i32 2
    292   %4 = insertelement <4 x i8> %3, i8 undef, i32 3
    293   %5 = zext <4 x i8> %4 to <4 x i32>
    294   ret <4 x i32> %5
    295 }
    296 
    297 define <4 x i64> @test_zext_4i8_4i64_undef() {
    298 ; X32-LABEL: test_zext_4i8_4i64_undef:
    299 ; X32:       # %bb.0:
    300 ; X32-NEXT:    vmovaps {{.*#+}} ymm0 = <u,u,255,0,2,0,u,u>
    301 ; X32-NEXT:    retl
    302 ;
    303 ; X64-LABEL: test_zext_4i8_4i64_undef:
    304 ; X64:       # %bb.0:
    305 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = <u,255,2,u>
    306 ; X64-NEXT:    retq
    307   %1 = insertelement <4 x i8> undef, i8 undef, i32 0
    308   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
    309   %3 = insertelement <4 x i8> %2, i8 2, i32 2
    310   %4 = insertelement <4 x i8> %3, i8 undef, i32 3
    311   %5 = zext <4 x i8> %4 to <4 x i64>
    312   ret <4 x i64> %5
    313 }
    314 
    315 define <8 x i16> @test_zext_8i8_8i16() {
    316 ; X32-LABEL: test_zext_8i8_8i16:
    317 ; X32:       # %bb.0:
    318 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,2,253,4,251,6,249]
    319 ; X32-NEXT:    retl
    320 ;
    321 ; X64-LABEL: test_zext_8i8_8i16:
    322 ; X64:       # %bb.0:
    323 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,2,253,4,251,6,249]
    324 ; X64-NEXT:    retq
    325   %1 = insertelement <8 x i8> undef, i8 0, i32 0
    326   %2 = insertelement <8 x i8> %1, i8 -1, i32 1
    327   %3 = insertelement <8 x i8> %2, i8 2, i32 2
    328   %4 = insertelement <8 x i8> %3, i8 -3, i32 3
    329   %5 = insertelement <8 x i8> %4, i8 4, i32 4
    330   %6 = insertelement <8 x i8> %5, i8 -5, i32 5
    331   %7 = insertelement <8 x i8> %6, i8 6, i32 6
    332   %8 = insertelement <8 x i8> %7, i8 -7, i32 7
    333   %9 = zext <8 x i8> %8 to <8 x i16>
    334   ret <8 x i16> %9
    335 }
    336 
    337 define <8 x i32> @test_zext_8i8_8i32() {
    338 ; X32-LABEL: test_zext_8i8_8i32:
    339 ; X32:       # %bb.0:
    340 ; X32-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,2,253,4,251,6,249]
    341 ; X32-NEXT:    retl
    342 ;
    343 ; X64-LABEL: test_zext_8i8_8i32:
    344 ; X64:       # %bb.0:
    345 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,2,253,4,251,6,249]
    346 ; X64-NEXT:    retq
    347   %1 = insertelement <8 x i8> undef, i8 0, i32 0
    348   %2 = insertelement <8 x i8> %1, i8 -1, i32 1
    349   %3 = insertelement <8 x i8> %2, i8 2, i32 2
    350   %4 = insertelement <8 x i8> %3, i8 -3, i32 3
    351   %5 = insertelement <8 x i8> %4, i8 4, i32 4
    352   %6 = insertelement <8 x i8> %5, i8 -5, i32 5
    353   %7 = insertelement <8 x i8> %6, i8 6, i32 6
    354   %8 = insertelement <8 x i8> %7, i8 -7, i32 7
    355   %9 = zext <8 x i8> %8 to <8 x i32>
    356   ret <8 x i32> %9
    357 }
    358 
    359 define <8 x i16> @test_zext_8i8_8i16_undef() {
    360 ; X32-LABEL: test_zext_8i8_8i16_undef:
    361 ; X32:       # %bb.0:
    362 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = <u,255,u,253,u,251,u,249>
    363 ; X32-NEXT:    retl
    364 ;
    365 ; X64-LABEL: test_zext_8i8_8i16_undef:
    366 ; X64:       # %bb.0:
    367 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = <u,255,u,253,u,251,u,249>
    368 ; X64-NEXT:    retq
    369   %1 = insertelement <8 x i8> undef, i8 undef, i32 0
    370   %2 = insertelement <8 x i8> %1, i8 -1, i32 1
    371   %3 = insertelement <8 x i8> %2, i8 undef, i32 2
    372   %4 = insertelement <8 x i8> %3, i8 -3, i32 3
    373   %5 = insertelement <8 x i8> %4, i8 undef, i32 4
    374   %6 = insertelement <8 x i8> %5, i8 -5, i32 5
    375   %7 = insertelement <8 x i8> %6, i8 undef, i32 6
    376   %8 = insertelement <8 x i8> %7, i8 -7, i32 7
    377   %9 = zext <8 x i8> %8 to <8 x i16>
    378   ret <8 x i16> %9
    379 }
    380 
    381 define <8 x i32> @test_zext_8i8_8i32_undef() {
    382 ; X32-LABEL: test_zext_8i8_8i32_undef:
    383 ; X32:       # %bb.0:
    384 ; X32-NEXT:    vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u>
    385 ; X32-NEXT:    retl
    386 ;
    387 ; X64-LABEL: test_zext_8i8_8i32_undef:
    388 ; X64:       # %bb.0:
    389 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u>
    390 ; X64-NEXT:    retq
    391   %1 = insertelement <8 x i8> undef, i8 0, i32 0
    392   %2 = insertelement <8 x i8> %1, i8 undef, i32 1
    393   %3 = insertelement <8 x i8> %2, i8 2, i32 2
    394   %4 = insertelement <8 x i8> %3, i8 -3, i32 3
    395   %5 = insertelement <8 x i8> %4, i8 4, i32 4
    396   %6 = insertelement <8 x i8> %5, i8 undef, i32 5
    397   %7 = insertelement <8 x i8> %6, i8 6, i32 6
    398   %8 = insertelement <8 x i8> %7, i8 undef, i32 7
    399   %9 = zext <8 x i8> %8 to <8 x i32>
    400   ret <8 x i32> %9
    401 }
    402