Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
      4 
      5 ; This test makes sure that the compiler does not crash with an
      6 ; assertion failure when trying to fold a vector shift left
      7 ; by immediate count if the type of the input vector is different
      8 ; to the result type.
      9 ;
     10 ; This happens for example when lowering a shift left of a MVT::v16i8 vector.
     11 ; This is custom lowered into the following sequence:
     12 ;     count << 5
     13 ;     A =  VSHLI(MVT::v8i16, r & (char16)15, 4)
     14 ;     B = BITCAST MVT::v16i8, A
     15 ;     VSELECT(r, B, count);
     16 ;     count += count
     17 ;     C = VSHLI(MVT::v8i16, r & (char16)63, 2)
     18 ;     D = BITCAST MVT::v16i8, C
     19 ;     r = VSELECT(r, C, count);
     20 ;     count += count
     21 ;     VSELECT(r, r+r, count);
     22 ;     count = count << 5;
     23 ;
     24 ; Where 'r' is a vector of type MVT::v16i8, and
     25 ; 'count' is the vector shift count.
     26 
     27 define <16 x i8> @do_not_crash(i8*, i32*, i64*, i32, i64, i8) {
     28 ; X32-LABEL: do_not_crash:
     29 ; X32:       # %bb.0: # %entry
     30 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     31 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     32 ; X32-NEXT:    movb %al, (%ecx)
     33 ; X32-NEXT:    movd %eax, %xmm0
     34 ; X32-NEXT:    psllq $56, %xmm0
     35 ; X32-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
     36 ; X32-NEXT:    movdqa %xmm2, %xmm1
     37 ; X32-NEXT:    pandn %xmm0, %xmm1
     38 ; X32-NEXT:    por %xmm2, %xmm1
     39 ; X32-NEXT:    pcmpeqd %xmm2, %xmm2
     40 ; X32-NEXT:    psllw $5, %xmm1
     41 ; X32-NEXT:    pxor %xmm3, %xmm3
     42 ; X32-NEXT:    pxor %xmm0, %xmm0
     43 ; X32-NEXT:    pcmpgtb %xmm1, %xmm0
     44 ; X32-NEXT:    pxor %xmm0, %xmm2
     45 ; X32-NEXT:    pand {{\.LCPI.*}}, %xmm0
     46 ; X32-NEXT:    por %xmm2, %xmm0
     47 ; X32-NEXT:    paddb %xmm1, %xmm1
     48 ; X32-NEXT:    pxor %xmm2, %xmm2
     49 ; X32-NEXT:    pcmpgtb %xmm1, %xmm2
     50 ; X32-NEXT:    movdqa %xmm2, %xmm4
     51 ; X32-NEXT:    pandn %xmm0, %xmm4
     52 ; X32-NEXT:    psllw $2, %xmm0
     53 ; X32-NEXT:    pand {{\.LCPI.*}}, %xmm0
     54 ; X32-NEXT:    pand %xmm2, %xmm0
     55 ; X32-NEXT:    por %xmm4, %xmm0
     56 ; X32-NEXT:    paddb %xmm1, %xmm1
     57 ; X32-NEXT:    pcmpgtb %xmm1, %xmm3
     58 ; X32-NEXT:    movdqa %xmm3, %xmm1
     59 ; X32-NEXT:    pandn %xmm0, %xmm1
     60 ; X32-NEXT:    paddb %xmm0, %xmm0
     61 ; X32-NEXT:    pand %xmm3, %xmm0
     62 ; X32-NEXT:    por %xmm1, %xmm0
     63 ; X32-NEXT:    retl
     64 ;
     65 ; X64-LABEL: do_not_crash:
     66 ; X64:       # %bb.0: # %entry
     67 ; X64-NEXT:    movb %r9b, (%rdi)
     68 ; X64-NEXT:    movd %r9d, %xmm0
     69 ; X64-NEXT:    psllq $56, %xmm0
     70 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
     71 ; X64-NEXT:    movdqa %xmm2, %xmm1
     72 ; X64-NEXT:    pandn %xmm0, %xmm1
     73 ; X64-NEXT:    por %xmm2, %xmm1
     74 ; X64-NEXT:    pcmpeqd %xmm2, %xmm2
     75 ; X64-NEXT:    psllw $5, %xmm1
     76 ; X64-NEXT:    pxor %xmm3, %xmm3
     77 ; X64-NEXT:    pxor %xmm0, %xmm0
     78 ; X64-NEXT:    pcmpgtb %xmm1, %xmm0
     79 ; X64-NEXT:    pxor %xmm0, %xmm2
     80 ; X64-NEXT:    pand {{.*}}(%rip), %xmm0
     81 ; X64-NEXT:    por %xmm2, %xmm0
     82 ; X64-NEXT:    paddb %xmm1, %xmm1
     83 ; X64-NEXT:    pxor %xmm2, %xmm2
     84 ; X64-NEXT:    pcmpgtb %xmm1, %xmm2
     85 ; X64-NEXT:    movdqa %xmm2, %xmm4
     86 ; X64-NEXT:    pandn %xmm0, %xmm4
     87 ; X64-NEXT:    psllw $2, %xmm0
     88 ; X64-NEXT:    pand {{.*}}(%rip), %xmm0
     89 ; X64-NEXT:    pand %xmm2, %xmm0
     90 ; X64-NEXT:    por %xmm4, %xmm0
     91 ; X64-NEXT:    paddb %xmm1, %xmm1
     92 ; X64-NEXT:    pcmpgtb %xmm1, %xmm3
     93 ; X64-NEXT:    movdqa %xmm3, %xmm1
     94 ; X64-NEXT:    pandn %xmm0, %xmm1
     95 ; X64-NEXT:    paddb %xmm0, %xmm0
     96 ; X64-NEXT:    pand %xmm3, %xmm0
     97 ; X64-NEXT:    por %xmm1, %xmm0
     98 ; X64-NEXT:    retq
     99 entry:
    100   store i8 %5, i8* %0
    101   %L5 = load i8, i8* %0
    102   %I8 = insertelement <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i8 %L5, i32 7
    103   %B51 = shl <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, %I8
    104   ret <16 x i8> %B51
    105 }
    106