1 ; This is a smoke test of nop insertion. 2 3 ; REQUIRES: allow_dump 4 5 ; Use filetype=asm because this currently depends on the /* variant */ 6 ; assembler comment. 7 8 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ 9 ; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \ 10 ; RUN: | FileCheck %s --check-prefix=PROB50 11 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ 12 ; RUN: -nop-insertion-percentage=90 -max-nops-per-instruction=1 \ 13 ; RUN: | FileCheck %s --check-prefix=PROB90 14 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ 15 ; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=2 \ 16 ; RUN: | FileCheck %s --check-prefix=MAXNOPS2 17 ; RUN: %p2i -i %s --filetype=asm --sandbox -a -sz-seed=1 -nop-insertion \ 18 ; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \ 19 ; RUN: | FileCheck %s --check-prefix=SANDBOX50 20 ; RUN: %p2i -i %s --filetype=asm --sandbox --target=arm32 -a -sz-seed=1 \ 21 ; RUN: -nop-insertion -nop-insertion-percentage=110 \ 22 ; RUN: -max-nops-per-instruction=2 \ 23 ; RUN: | FileCheck %s --check-prefix=ARM110P2 24 25 26 define internal <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) { 27 entry: 28 %res = mul <4 x i32> %a, %b 29 ret <4 x i32> %res 30 31 ; PROB50-LABEL: mul_v4i32 32 ; PROB50: nop /* variant = 1 */ 33 ; PROB50: subl $60, %esp 34 ; PROB50: nop /* variant = 3 */ 35 ; PROB50: movups %xmm0, 32(%esp) 36 ; PROB50: movups %xmm1, 16(%esp) 37 ; PROB50: movups 32(%esp), %xmm0 38 ; PROB50: nop /* variant = 1 */ 39 ; PROB50: pshufd $49, 32(%esp), %xmm1 40 ; PROB50: nop /* variant = 4 */ 41 ; PROB50: pshufd $49, 16(%esp), %xmm2 42 ; PROB50: nop /* variant = 1 */ 43 ; PROB50: pmuludq 16(%esp), %xmm0 44 ; PROB50: pmuludq %xmm2, %xmm1 45 ; PROB50: nop /* variant = 0 */ 46 ; PROB50: shufps $136, %xmm1, %xmm0 47 ; PROB50: nop /* variant = 3 */ 48 ; PROB50: pshufd $216, %xmm0, %xmm0 49 ; PROB50: nop /* variant = 1 */ 50 ; PROB50: movups %xmm0, (%esp) 51 ; PROB50: movups (%esp), %xmm0 52 ; PROB50: addl $60, %esp 53 ; PROB50: ret 54 55 ; PROB90-LABEL: mul_v4i32 56 ; PROB90: nop /* variant = 1 */ 57 ; PROB90: subl $60, %esp 58 ; PROB90: nop /* variant = 3 */ 59 ; PROB90: movups %xmm0, 32(%esp) 60 ; PROB90: nop /* variant = 4 */ 61 ; PROB90: movups %xmm1, 16(%esp) 62 ; PROB90: nop /* variant = 1 */ 63 ; PROB90: movups 32(%esp), %xmm0 64 ; PROB90: nop /* variant = 4 */ 65 ; PROB90: pshufd $49, 32(%esp), %xmm1 66 ; PROB90: nop /* variant = 1 */ 67 ; PROB90: pshufd $49, 16(%esp), %xmm2 68 ; PROB90: nop /* variant = 4 */ 69 ; PROB90: pmuludq 16(%esp), %xmm0 70 ; PROB90: nop /* variant = 2 */ 71 ; PROB90: pmuludq %xmm2, %xmm1 72 ; PROB90: shufps $136, %xmm1, %xmm0 73 ; PROB90: nop /* variant = 1 */ 74 ; PROB90: pshufd $216, %xmm0, %xmm0 75 ; PROB90: movups %xmm0, (%esp) 76 ; PROB90: nop /* variant = 1 */ 77 ; PROB90: movups (%esp), %xmm0 78 ; PROB90: nop /* variant = 0 */ 79 ; PROB90: addl $60, %esp 80 ; PROB90: nop /* variant = 0 */ 81 ; PROB90: ret 82 ; PROB90: nop /* variant = 4 */ 83 84 ; MAXNOPS2-LABEL: mul_v4i32 85 ; MAXNOPS2: nop /* variant = 1 */ 86 ; MAXNOPS2: nop /* variant = 3 */ 87 ; MAXNOPS2: subl $60, %esp 88 ; MAXNOPS2: movups %xmm0, 32(%esp) 89 ; MAXNOPS2: nop /* variant = 1 */ 90 ; MAXNOPS2: nop /* variant = 4 */ 91 ; MAXNOPS2: movups %xmm1, 16(%esp) 92 ; MAXNOPS2: nop /* variant = 1 */ 93 ; MAXNOPS2: movups 32(%esp), %xmm0 94 ; MAXNOPS2: nop /* variant = 0 */ 95 ; MAXNOPS2: nop /* variant = 3 */ 96 ; MAXNOPS2: pshufd $49, 32(%esp), %xmm1 97 ; MAXNOPS2: nop /* variant = 1 */ 98 ; MAXNOPS2: pshufd $49, 16(%esp), %xmm2 99 ; MAXNOPS2: pmuludq 16(%esp), %xmm0 100 ; MAXNOPS2: pmuludq %xmm2, %xmm1 101 ; MAXNOPS2: nop /* variant = 0 */ 102 ; MAXNOPS2: shufps $136, %xmm1, %xmm0 103 ; MAXNOPS2: nop /* variant = 0 */ 104 ; MAXNOPS2: nop /* variant = 0 */ 105 ; MAXNOPS2: pshufd $216, %xmm0, %xmm0 106 ; MAXNOPS2: nop /* variant = 1 */ 107 ; MAXNOPS2: nop /* variant = 3 */ 108 ; MAXNOPS2: movups %xmm0, (%esp) 109 ; MAXNOPS2: nop /* variant = 3 */ 110 ; MAXNOPS2: movups (%esp), %xmm0 111 ; MAXNOPS2: addl $60, %esp 112 ; MAXNOPS2: nop /* variant = 3 */ 113 ; MAXNOPS2: ret 114 115 116 ; SANDBOX50-LABEL: mul_v4i32 117 ; SANDBOX50: nop /* variant = 1 */ 118 ; SANDBOX50: subl $60, %esp 119 ; SANDBOX50: nop /* variant = 3 */ 120 ; SANDBOX50: movups %xmm0, 32(%esp) 121 ; SANDBOX50: movups %xmm1, 16(%esp) 122 ; SANDBOX50: movups 32(%esp), %xmm0 123 ; SANDBOX50: nop /* variant = 1 */ 124 ; SANDBOX50: pshufd $49, 32(%esp), %xmm1 125 ; SANDBOX50: nop /* variant = 4 */ 126 ; SANDBOX50: pshufd $49, 16(%esp), %xmm2 127 ; SANDBOX50: nop /* variant = 1 */ 128 ; SANDBOX50: pmuludq 16(%esp), %xmm0 129 ; SANDBOX50: pmuludq %xmm2, %xmm1 130 ; SANDBOX50: nop /* variant = 0 */ 131 ; SANDBOX50: shufps $136, %xmm1, %xmm0 132 ; SANDBOX50: nop /* variant = 3 */ 133 ; SANDBOX50: pshufd $216, %xmm0, %xmm0 134 ; SANDBOX50: nop /* variant = 1 */ 135 ; SANDBOX50: movups %xmm0, (%esp) 136 ; SANDBOX50: movups (%esp), %xmm0 137 ; SANDBOX50: addl $60, %esp 138 ; SANDBOX50: pop %ecx 139 ; SANDBOX50: .bundle_lock 140 ; SANDBOX50: andl $-32, %ecx 141 ; SANDBOX50: jmp *%ecx 142 ; SANDBOX50: .bundle_unlock 143 144 ; ARM110P2: mul_v4i32: 145 ; ARM110P2-NEXT: .Lmul_v4i32$entry: 146 ; ARM110P2-NEXT: .bundle_lock 147 ; ARM110P2-NEXT: sub sp, sp, #48 148 ; ARM110P2-NEXT: bic sp, sp, #3221225472 149 ; ARM110P2-NEXT: .bundle_unlock 150 ; ARM110P2-NEXT: nop 151 ; ARM110P2-NEXT: nop 152 ; ARM110P2-NEXT: add ip, sp, #32 153 ; ARM110P2-NEXT: nop 154 ; ARM110P2-NEXT: nop 155 ; ARM110P2-NEXT: .bundle_lock 156 ; ARM110P2-NEXT: bic ip, ip, #3221225472 157 ; ARM110P2-NEXT: vst1.32 q0, [ip] 158 ; ARM110P2-NEXT: .bundle_unlock 159 ; ARM110P2-NEXT: nop 160 ; ARM110P2-NEXT: nop 161 ; ARM110P2-NEXT: # [sp, #32] = def.pseudo 162 ; ARM110P2-NEXT: add ip, sp, #16 163 ; ARM110P2-NEXT: nop 164 ; ARM110P2-NEXT: nop 165 ; ARM110P2-NEXT: .bundle_lock 166 ; ARM110P2-NEXT: bic ip, ip, #3221225472 167 ; ARM110P2-NEXT: vst1.32 q1, [ip] 168 ; ARM110P2-NEXT: .bundle_unlock 169 ; ARM110P2-NEXT: nop 170 ; ARM110P2-NEXT: nop 171 ; ARM110P2-NEXT: # [sp, #16] = def.pseudo 172 ; ARM110P2-NEXT: add ip, sp, #32 173 ; ARM110P2-NEXT: nop 174 ; ARM110P2-NEXT: nop 175 ; ARM110P2-NEXT: .bundle_lock 176 ; ARM110P2-NEXT: bic ip, ip, #3221225472 177 ; ARM110P2-NEXT: vld1.32 q0, [ip] 178 ; ARM110P2-NEXT: .bundle_unlock 179 ; ARM110P2-NEXT: nop 180 ; ARM110P2-NEXT: nop 181 ; ARM110P2-NEXT: add ip, sp, #16 182 ; ARM110P2-NEXT: nop 183 ; ARM110P2-NEXT: nop 184 ; ARM110P2-NEXT: .bundle_lock 185 ; ARM110P2-NEXT: bic ip, ip, #3221225472 186 ; ARM110P2-NEXT: vld1.32 q1, [ip] 187 ; ARM110P2-NEXT: .bundle_unlock 188 ; ARM110P2-NEXT: nop 189 ; ARM110P2-NEXT: nop 190 ; ARM110P2-NEXT: vmul.i32 q0, q0, q1 191 ; ARM110P2-NEXT: nop 192 ; ARM110P2-NEXT: nop 193 ; ARM110P2-NEXT: vst1.32 q0, [sp] 194 ; ARM110P2-NEXT: nop 195 ; ARM110P2-NEXT: nop 196 ; ARM110P2-NEXT: # [sp] = def.pseudo 197 ; ARM110P2-NEXT: vld1.32 q0, [sp] 198 ; ARM110P2-NEXT: nop 199 ; ARM110P2-NEXT: nop 200 ; ARM110P2-NEXT: .bundle_lock 201 ; ARM110P2-NEXT: add sp, sp, #48 202 ; ARM110P2-NEXT: bic sp, sp, #3221225472 203 ; ARM110P2-NEXT: .bundle_unlock 204 ; ARM110P2-NEXT: nop 205 ; ARM110P2-NEXT: nop 206 ; ARM110P2-NEXT: .bundle_lock 207 ; ARM110P2-NEXT: bic lr, lr, #3221225487 208 ; ARM110P2-NEXT: bx lr 209 ; ARM110P2-NEXT: .bundle_unlock 210 ; ARM110P2-NEXT: nop 211 ; ARM110P2-NEXT: nop 212 213 } 214