Home | History | Annotate | Download | only in llvm2ice_tests
      1 ; This is a smoke test of nop insertion.
      2 
      3 ; REQUIRES: allow_dump
      4 
      5 ; Use filetype=asm because this currently depends on the /* variant */
      6 ; assembler comment.
      7 
      8 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \
      9 ; RUN:    -nop-insertion-percentage=50 -max-nops-per-instruction=1 \
     10 ; RUN:    | FileCheck %s --check-prefix=PROB50
     11 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \
     12 ; RUN:    -nop-insertion-percentage=90 -max-nops-per-instruction=1 \
     13 ; RUN:    | FileCheck %s --check-prefix=PROB90
     14 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \
     15 ; RUN:    -nop-insertion-percentage=50 -max-nops-per-instruction=2 \
     16 ; RUN:    | FileCheck %s --check-prefix=MAXNOPS2
     17 ; RUN: %p2i -i %s --filetype=asm --sandbox -a -sz-seed=1 -nop-insertion \
     18 ; RUN:    -nop-insertion-percentage=50 -max-nops-per-instruction=1 \
     19 ; RUN:    | FileCheck %s --check-prefix=SANDBOX50
     20 ; RUN: %p2i -i %s --filetype=asm --sandbox --target=arm32 -a -sz-seed=1 \
     21 ; RUN:    -nop-insertion -nop-insertion-percentage=110 \
     22 ; RUN:    -max-nops-per-instruction=2 \
     23 ; RUN:    | FileCheck %s --check-prefix=ARM110P2
     24 
     25 
     26 define internal <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
     27 entry:
     28   %res = mul <4 x i32> %a, %b
     29   ret <4 x i32> %res
     30 
     31 ; PROB50-LABEL: mul_v4i32
     32 ; PROB50: nop /* variant = 1 */
     33 ; PROB50: subl $60, %esp
     34 ; PROB50: nop /* variant = 3 */
     35 ; PROB50: movups %xmm0, 32(%esp)
     36 ; PROB50: movups %xmm1, 16(%esp)
     37 ; PROB50: movups 32(%esp), %xmm0
     38 ; PROB50: nop /* variant = 1 */
     39 ; PROB50: pshufd $49, 32(%esp), %xmm1
     40 ; PROB50: nop /* variant = 4 */
     41 ; PROB50: pshufd $49, 16(%esp), %xmm2
     42 ; PROB50: nop /* variant = 1 */
     43 ; PROB50: pmuludq 16(%esp), %xmm0
     44 ; PROB50: pmuludq %xmm2, %xmm1
     45 ; PROB50: nop /* variant = 0 */
     46 ; PROB50: shufps $136, %xmm1, %xmm0
     47 ; PROB50: nop /* variant = 3 */
     48 ; PROB50: pshufd $216, %xmm0, %xmm0
     49 ; PROB50: nop /* variant = 1 */
     50 ; PROB50: movups %xmm0, (%esp)
     51 ; PROB50: movups (%esp), %xmm0
     52 ; PROB50: addl $60, %esp
     53 ; PROB50: ret
     54 
     55 ; PROB90-LABEL: mul_v4i32
     56 ; PROB90: nop /* variant = 1 */
     57 ; PROB90: subl $60, %esp
     58 ; PROB90: nop /* variant = 3 */
     59 ; PROB90: movups %xmm0, 32(%esp)
     60 ; PROB90: nop /* variant = 4 */
     61 ; PROB90: movups %xmm1, 16(%esp)
     62 ; PROB90: nop /* variant = 1 */
     63 ; PROB90: movups 32(%esp), %xmm0
     64 ; PROB90: nop /* variant = 4 */
     65 ; PROB90: pshufd $49, 32(%esp), %xmm1
     66 ; PROB90: nop /* variant = 1 */
     67 ; PROB90: pshufd $49, 16(%esp), %xmm2
     68 ; PROB90: nop /* variant = 4 */
     69 ; PROB90: pmuludq 16(%esp), %xmm0
     70 ; PROB90: nop /* variant = 2 */
     71 ; PROB90: pmuludq %xmm2, %xmm1
     72 ; PROB90: shufps $136, %xmm1, %xmm0
     73 ; PROB90: nop /* variant = 1 */
     74 ; PROB90: pshufd $216, %xmm0, %xmm0
     75 ; PROB90: movups %xmm0, (%esp)
     76 ; PROB90: nop /* variant = 1 */
     77 ; PROB90: movups (%esp), %xmm0
     78 ; PROB90: nop /* variant = 0 */
     79 ; PROB90: addl $60, %esp
     80 ; PROB90: nop /* variant = 0 */
     81 ; PROB90: ret
     82 ; PROB90: nop /* variant = 4 */
     83 
     84 ; MAXNOPS2-LABEL: mul_v4i32
     85 ; MAXNOPS2: nop /* variant = 1 */
     86 ; MAXNOPS2: nop /* variant = 3 */
     87 ; MAXNOPS2: subl $60, %esp
     88 ; MAXNOPS2: movups %xmm0, 32(%esp)
     89 ; MAXNOPS2: nop /* variant = 1 */
     90 ; MAXNOPS2: nop /* variant = 4 */
     91 ; MAXNOPS2: movups %xmm1, 16(%esp)
     92 ; MAXNOPS2: nop /* variant = 1 */
     93 ; MAXNOPS2: movups 32(%esp), %xmm0
     94 ; MAXNOPS2: nop /* variant = 0 */
     95 ; MAXNOPS2: nop /* variant = 3 */
     96 ; MAXNOPS2: pshufd $49, 32(%esp), %xmm1
     97 ; MAXNOPS2: nop /* variant = 1 */
     98 ; MAXNOPS2: pshufd $49, 16(%esp), %xmm2
     99 ; MAXNOPS2: pmuludq 16(%esp), %xmm0
    100 ; MAXNOPS2: pmuludq %xmm2, %xmm1
    101 ; MAXNOPS2: nop /* variant = 0 */
    102 ; MAXNOPS2: shufps $136, %xmm1, %xmm0
    103 ; MAXNOPS2: nop /* variant = 0 */
    104 ; MAXNOPS2: nop /* variant = 0 */
    105 ; MAXNOPS2: pshufd $216, %xmm0, %xmm0
    106 ; MAXNOPS2: nop /* variant = 1 */
    107 ; MAXNOPS2: nop /* variant = 3 */
    108 ; MAXNOPS2: movups %xmm0, (%esp)
    109 ; MAXNOPS2: nop /* variant = 3 */
    110 ; MAXNOPS2: movups (%esp), %xmm0
    111 ; MAXNOPS2: addl $60, %esp
    112 ; MAXNOPS2: nop /* variant = 3 */
    113 ; MAXNOPS2: ret
    114 
    115 
    116 ; SANDBOX50-LABEL: mul_v4i32
    117 ; SANDBOX50: nop /* variant = 1 */
    118 ; SANDBOX50: subl $60, %esp
    119 ; SANDBOX50: nop /* variant = 3 */
    120 ; SANDBOX50: movups %xmm0, 32(%esp)
    121 ; SANDBOX50: movups %xmm1, 16(%esp)
    122 ; SANDBOX50: movups 32(%esp), %xmm0
    123 ; SANDBOX50: nop /* variant = 1 */
    124 ; SANDBOX50: pshufd $49, 32(%esp), %xmm1
    125 ; SANDBOX50: nop /* variant = 4 */
    126 ; SANDBOX50: pshufd $49, 16(%esp), %xmm2
    127 ; SANDBOX50: nop /* variant = 1 */
    128 ; SANDBOX50: pmuludq 16(%esp), %xmm0
    129 ; SANDBOX50: pmuludq %xmm2, %xmm1
    130 ; SANDBOX50: nop /* variant = 0 */
    131 ; SANDBOX50: shufps $136, %xmm1, %xmm0
    132 ; SANDBOX50: nop /* variant = 3 */
    133 ; SANDBOX50: pshufd $216, %xmm0, %xmm0
    134 ; SANDBOX50: nop /* variant = 1 */
    135 ; SANDBOX50: movups %xmm0, (%esp)
    136 ; SANDBOX50: movups (%esp), %xmm0
    137 ; SANDBOX50: addl $60, %esp
    138 ; SANDBOX50: pop %ecx
    139 ; SANDBOX50: .bundle_lock
    140 ; SANDBOX50: andl $-32, %ecx
    141 ; SANDBOX50: jmp *%ecx
    142 ; SANDBOX50: .bundle_unlock
    143 
    144 ; ARM110P2:       mul_v4i32:
    145 ; ARM110P2-NEXT: .Lmul_v4i32$entry:
    146 ; ARM110P2-NEXT:        .bundle_lock
    147 ; ARM110P2-NEXT:        sub     sp, sp, #48
    148 ; ARM110P2-NEXT:        bic     sp, sp, #3221225472
    149 ; ARM110P2-NEXT:        .bundle_unlock
    150 ; ARM110P2-NEXT:        nop
    151 ; ARM110P2-NEXT:        nop
    152 ; ARM110P2-NEXT:        add     ip, sp, #32
    153 ; ARM110P2-NEXT:        nop
    154 ; ARM110P2-NEXT:        nop
    155 ; ARM110P2-NEXT:        .bundle_lock
    156 ; ARM110P2-NEXT:        bic     ip, ip, #3221225472
    157 ; ARM110P2-NEXT:        vst1.32 q0, [ip]
    158 ; ARM110P2-NEXT:        .bundle_unlock
    159 ; ARM110P2-NEXT:        nop
    160 ; ARM110P2-NEXT:        nop
    161 ; ARM110P2-NEXT:        # [sp, #32] = def.pseudo
    162 ; ARM110P2-NEXT:        add     ip, sp, #16
    163 ; ARM110P2-NEXT:        nop
    164 ; ARM110P2-NEXT:        nop
    165 ; ARM110P2-NEXT:        .bundle_lock
    166 ; ARM110P2-NEXT:        bic     ip, ip, #3221225472
    167 ; ARM110P2-NEXT:        vst1.32 q1, [ip]
    168 ; ARM110P2-NEXT:        .bundle_unlock
    169 ; ARM110P2-NEXT:        nop
    170 ; ARM110P2-NEXT:        nop
    171 ; ARM110P2-NEXT:        # [sp, #16] = def.pseudo
    172 ; ARM110P2-NEXT:        add     ip, sp, #32
    173 ; ARM110P2-NEXT:        nop
    174 ; ARM110P2-NEXT:        nop
    175 ; ARM110P2-NEXT:        .bundle_lock
    176 ; ARM110P2-NEXT:        bic     ip, ip, #3221225472
    177 ; ARM110P2-NEXT:        vld1.32 q0, [ip]
    178 ; ARM110P2-NEXT:        .bundle_unlock
    179 ; ARM110P2-NEXT:        nop
    180 ; ARM110P2-NEXT:        nop
    181 ; ARM110P2-NEXT:        add     ip, sp, #16
    182 ; ARM110P2-NEXT:        nop
    183 ; ARM110P2-NEXT:        nop
    184 ; ARM110P2-NEXT:        .bundle_lock
    185 ; ARM110P2-NEXT:        bic     ip, ip, #3221225472
    186 ; ARM110P2-NEXT:        vld1.32 q1, [ip]
    187 ; ARM110P2-NEXT:        .bundle_unlock
    188 ; ARM110P2-NEXT:        nop
    189 ; ARM110P2-NEXT:        nop
    190 ; ARM110P2-NEXT:        vmul.i32        q0, q0, q1
    191 ; ARM110P2-NEXT:        nop
    192 ; ARM110P2-NEXT:        nop
    193 ; ARM110P2-NEXT:        vst1.32 q0, [sp]
    194 ; ARM110P2-NEXT:        nop
    195 ; ARM110P2-NEXT:        nop
    196 ; ARM110P2-NEXT:        # [sp] = def.pseudo
    197 ; ARM110P2-NEXT:        vld1.32 q0, [sp]
    198 ; ARM110P2-NEXT:        nop
    199 ; ARM110P2-NEXT:        nop
    200 ; ARM110P2-NEXT:        .bundle_lock
    201 ; ARM110P2-NEXT:        add     sp, sp, #48
    202 ; ARM110P2-NEXT:        bic     sp, sp, #3221225472
    203 ; ARM110P2-NEXT:        .bundle_unlock
    204 ; ARM110P2-NEXT:        nop
    205 ; ARM110P2-NEXT:        nop
    206 ; ARM110P2-NEXT:        .bundle_lock
    207 ; ARM110P2-NEXT:        bic     lr, lr, #3221225487
    208 ; ARM110P2-NEXT:        bx      lr
    209 ; ARM110P2-NEXT:        .bundle_unlock
    210 ; ARM110P2-NEXT:        nop
    211 ; ARM110P2-NEXT:        nop
    212 
    213 }
    214