1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4 6 7 define i32 @test_x86_tbm_bextri_u32(i32 %a0, i32* nocapture %p1) nounwind { 8 ; GENERIC-LABEL: test_x86_tbm_bextri_u32: 9 ; GENERIC: # %bb.0: 10 ; GENERIC-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 11 ; GENERIC-NEXT: # sched: [2:1.00] 12 ; GENERIC-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 13 ; GENERIC-NEXT: # sched: [7:1.00] 14 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 15 ; GENERIC-NEXT: retq # sched: [1:1.00] 16 ; 17 ; BDVER-LABEL: test_x86_tbm_bextri_u32: 18 ; BDVER: # %bb.0: 19 ; BDVER-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 20 ; BDVER-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 21 ; BDVER-NEXT: addl %ecx, %eax 22 ; BDVER-NEXT: retq 23 %a1 = load i32, i32* %p1 24 %r0 = lshr i32 %a0, 4 25 %m0 = lshr i32 %a1, 4 26 %r1 = and i32 %r0, 4095 27 %m1 = and i32 %m0, 4095 28 %res = add i32 %r1, %m1 29 ret i32 %res 30 } 31 32 define i64 @test_x86_tbm_bextri_u64(i64 %a0, i64* nocapture %p1) nounwind { 33 ; GENERIC-LABEL: test_x86_tbm_bextri_u64: 34 ; GENERIC: # %bb.0: 35 ; GENERIC-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 36 ; GENERIC-NEXT: # sched: [2:1.00] 37 ; GENERIC-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 38 ; GENERIC-NEXT: # sched: [7:1.00] 39 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 40 ; GENERIC-NEXT: retq # sched: [1:1.00] 41 ; 42 ; BDVER-LABEL: test_x86_tbm_bextri_u64: 43 ; BDVER: # %bb.0: 44 ; BDVER-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 45 ; BDVER-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 46 ; BDVER-NEXT: addq %rcx, %rax 47 ; BDVER-NEXT: retq 48 %a1 = load i64, i64* %p1 49 %r0 = lshr i64 %a0, 4 50 %m0 = lshr i64 %a1, 4 51 %r1 = and i64 %r0, 4095 52 %m1 = and i64 %m0, 4095 53 %res = add i64 %r1, %m1 54 ret i64 %res 55 } 56 57 define i32 @test_x86_tbm_blcfill_u32(i32 %a0, i32* nocapture %p1) nounwind { 58 ; GENERIC-LABEL: test_x86_tbm_blcfill_u32: 59 ; GENERIC: # %bb.0: 60 ; GENERIC-NEXT: blcfilll %edi, %ecx # sched: [1:0.33] 61 ; GENERIC-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50] 62 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 63 ; GENERIC-NEXT: retq # sched: [1:1.00] 64 ; 65 ; BDVER-LABEL: test_x86_tbm_blcfill_u32: 66 ; BDVER: # %bb.0: 67 ; BDVER-NEXT: blcfilll %edi, %ecx 68 ; BDVER-NEXT: blcfilll (%rsi), %eax 69 ; BDVER-NEXT: addl %ecx, %eax 70 ; BDVER-NEXT: retq 71 %a1 = load i32, i32* %p1 72 %r0 = add i32 %a0, 1 73 %m0 = add i32 %a1, 1 74 %r1 = and i32 %r0, %a0 75 %m1 = and i32 %m0, %a1 76 %res = add i32 %r1, %m1 77 ret i32 %res 78 } 79 80 define i64 @test_x86_tbm_blcfill_u64(i64 %a0, i64* nocapture %p1) nounwind { 81 ; GENERIC-LABEL: test_x86_tbm_blcfill_u64: 82 ; GENERIC: # %bb.0: 83 ; GENERIC-NEXT: blcfillq %rdi, %rcx # sched: [1:0.33] 84 ; GENERIC-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50] 85 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 86 ; GENERIC-NEXT: retq # sched: [1:1.00] 87 ; 88 ; BDVER-LABEL: test_x86_tbm_blcfill_u64: 89 ; BDVER: # %bb.0: 90 ; BDVER-NEXT: blcfillq %rdi, %rcx 91 ; BDVER-NEXT: blcfillq (%rsi), %rax 92 ; BDVER-NEXT: addq %rcx, %rax 93 ; BDVER-NEXT: retq 94 %a1 = load i64, i64* %p1 95 %r0 = add i64 %a0, 1 96 %m0 = add i64 %a1, 1 97 %r1 = and i64 %r0, %a0 98 %m1 = and i64 %m0, %a1 99 %res = add i64 %r1, %m1 100 ret i64 %res 101 } 102 103 define i32 @test_x86_tbm_blci_u32(i32 %a0, i32* nocapture %p1) nounwind { 104 ; GENERIC-LABEL: test_x86_tbm_blci_u32: 105 ; GENERIC: # %bb.0: 106 ; GENERIC-NEXT: blcil %edi, %ecx # sched: [1:0.33] 107 ; GENERIC-NEXT: blcil (%rsi), %eax # sched: [6:0.50] 108 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 109 ; GENERIC-NEXT: retq # sched: [1:1.00] 110 ; 111 ; BDVER-LABEL: test_x86_tbm_blci_u32: 112 ; BDVER: # %bb.0: 113 ; BDVER-NEXT: blcil %edi, %ecx 114 ; BDVER-NEXT: blcil (%rsi), %eax 115 ; BDVER-NEXT: addl %ecx, %eax 116 ; BDVER-NEXT: retq 117 %a1 = load i32, i32* %p1 118 %r0 = add i32 1, %a0 119 %m0 = add i32 1, %a1 120 %r1 = xor i32 %r0, -1 121 %m1 = xor i32 %m0, -1 122 %r2 = or i32 %r1, %a0 123 %m2 = or i32 %m1, %a1 124 %res = add i32 %r2, %m2 125 ret i32 %res 126 } 127 128 define i64 @test_x86_tbm_blci_u64(i64 %a0, i64* nocapture %p1) nounwind { 129 ; GENERIC-LABEL: test_x86_tbm_blci_u64: 130 ; GENERIC: # %bb.0: 131 ; GENERIC-NEXT: blciq %rdi, %rcx # sched: [1:0.33] 132 ; GENERIC-NEXT: blciq (%rsi), %rax # sched: [6:0.50] 133 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 134 ; GENERIC-NEXT: retq # sched: [1:1.00] 135 ; 136 ; BDVER-LABEL: test_x86_tbm_blci_u64: 137 ; BDVER: # %bb.0: 138 ; BDVER-NEXT: blciq %rdi, %rcx 139 ; BDVER-NEXT: blciq (%rsi), %rax 140 ; BDVER-NEXT: addq %rcx, %rax 141 ; BDVER-NEXT: retq 142 %a1 = load i64, i64* %p1 143 %r0 = add i64 1, %a0 144 %m0 = add i64 1, %a1 145 %r1 = xor i64 %r0, -1 146 %m1 = xor i64 %m0, -1 147 %r2 = or i64 %r1, %a0 148 %m2 = or i64 %m1, %a1 149 %res = add i64 %r2, %m2 150 ret i64 %res 151 } 152 153 define i32 @test_x86_tbm_blcic_u32(i32 %a0, i32* nocapture %p1) nounwind { 154 ; GENERIC-LABEL: test_x86_tbm_blcic_u32: 155 ; GENERIC: # %bb.0: 156 ; GENERIC-NEXT: blcicl %edi, %ecx # sched: [1:0.33] 157 ; GENERIC-NEXT: blcicl (%rsi), %eax # sched: [6:0.50] 158 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 159 ; GENERIC-NEXT: retq # sched: [1:1.00] 160 ; 161 ; BDVER-LABEL: test_x86_tbm_blcic_u32: 162 ; BDVER: # %bb.0: 163 ; BDVER-NEXT: blcicl %edi, %ecx 164 ; BDVER-NEXT: blcicl (%rsi), %eax 165 ; BDVER-NEXT: addl %ecx, %eax 166 ; BDVER-NEXT: retq 167 %a1 = load i32, i32* %p1 168 %r0 = xor i32 %a0, -1 169 %m0 = xor i32 %a1, -1 170 %r1 = add i32 %a0, 1 171 %m1 = add i32 %a1, 1 172 %r2 = and i32 %r1, %r0 173 %m2 = and i32 %m1, %m0 174 %res = add i32 %r2, %m2 175 ret i32 %res 176 } 177 178 define i64 @test_x86_tbm_blcic_u64(i64 %a0, i64* nocapture %p1) nounwind { 179 ; GENERIC-LABEL: test_x86_tbm_blcic_u64: 180 ; GENERIC: # %bb.0: 181 ; GENERIC-NEXT: blcicq %rdi, %rcx # sched: [1:0.33] 182 ; GENERIC-NEXT: blcicq (%rsi), %rax # sched: [6:0.50] 183 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 184 ; GENERIC-NEXT: retq # sched: [1:1.00] 185 ; 186 ; BDVER-LABEL: test_x86_tbm_blcic_u64: 187 ; BDVER: # %bb.0: 188 ; BDVER-NEXT: blcicq %rdi, %rcx 189 ; BDVER-NEXT: blcicq (%rsi), %rax 190 ; BDVER-NEXT: addq %rcx, %rax 191 ; BDVER-NEXT: retq 192 %a1 = load i64, i64* %p1 193 %r0 = xor i64 %a0, -1 194 %m0 = xor i64 %a1, -1 195 %r1 = add i64 %a0, 1 196 %m1 = add i64 %a1, 1 197 %r2 = and i64 %r1, %r0 198 %m2 = and i64 %m1, %m0 199 %res = add i64 %r2, %m2 200 ret i64 %res 201 } 202 203 define i32 @test_x86_tbm_blcmsk_u32(i32 %a0, i32* nocapture %p1) nounwind { 204 ; GENERIC-LABEL: test_x86_tbm_blcmsk_u32: 205 ; GENERIC: # %bb.0: 206 ; GENERIC-NEXT: blcmskl %edi, %ecx # sched: [1:0.33] 207 ; GENERIC-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50] 208 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 209 ; GENERIC-NEXT: retq # sched: [1:1.00] 210 ; 211 ; BDVER-LABEL: test_x86_tbm_blcmsk_u32: 212 ; BDVER: # %bb.0: 213 ; BDVER-NEXT: blcmskl %edi, %ecx 214 ; BDVER-NEXT: blcmskl (%rsi), %eax 215 ; BDVER-NEXT: addl %ecx, %eax 216 ; BDVER-NEXT: retq 217 %a1 = load i32, i32* %p1 218 %r0 = add i32 %a0, 1 219 %m0 = add i32 %a1, 1 220 %r1 = xor i32 %r0, %a0 221 %m1 = xor i32 %m0, %a1 222 %res = add i32 %r1, %m1 223 ret i32 %res 224 } 225 226 define i64 @test_x86_tbm_blcmsk_u64(i64 %a0, i64* nocapture %p1) nounwind { 227 ; GENERIC-LABEL: test_x86_tbm_blcmsk_u64: 228 ; GENERIC: # %bb.0: 229 ; GENERIC-NEXT: blcmskq %rdi, %rcx # sched: [1:0.33] 230 ; GENERIC-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50] 231 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 232 ; GENERIC-NEXT: retq # sched: [1:1.00] 233 ; 234 ; BDVER-LABEL: test_x86_tbm_blcmsk_u64: 235 ; BDVER: # %bb.0: 236 ; BDVER-NEXT: blcmskq %rdi, %rcx 237 ; BDVER-NEXT: blcmskq (%rsi), %rax 238 ; BDVER-NEXT: addq %rcx, %rax 239 ; BDVER-NEXT: retq 240 %a1 = load i64, i64* %p1 241 %r0 = add i64 %a0, 1 242 %m0 = add i64 %a1, 1 243 %r1 = xor i64 %r0, %a0 244 %m1 = xor i64 %m0, %a1 245 %res = add i64 %r1, %m1 246 ret i64 %res 247 } 248 249 define i32 @test_x86_tbm_blcs_u32(i32 %a0, i32* nocapture %p1) nounwind { 250 ; GENERIC-LABEL: test_x86_tbm_blcs_u32: 251 ; GENERIC: # %bb.0: 252 ; GENERIC-NEXT: blcsl %edi, %ecx # sched: [1:0.33] 253 ; GENERIC-NEXT: blcsl (%rsi), %eax # sched: [6:0.50] 254 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 255 ; GENERIC-NEXT: retq # sched: [1:1.00] 256 ; 257 ; BDVER-LABEL: test_x86_tbm_blcs_u32: 258 ; BDVER: # %bb.0: 259 ; BDVER-NEXT: blcsl %edi, %ecx 260 ; BDVER-NEXT: blcsl (%rsi), %eax 261 ; BDVER-NEXT: addl %ecx, %eax 262 ; BDVER-NEXT: retq 263 %a1 = load i32, i32* %p1 264 %r0 = add i32 %a0, 1 265 %m0 = add i32 %a1, 1 266 %r1 = or i32 %r0, %a0 267 %m1 = or i32 %m0, %a1 268 %res = add i32 %r1, %m1 269 ret i32 %res 270 } 271 272 define i64 @test_x86_tbm_blcs_u64(i64 %a0, i64* nocapture %p1) nounwind { 273 ; GENERIC-LABEL: test_x86_tbm_blcs_u64: 274 ; GENERIC: # %bb.0: 275 ; GENERIC-NEXT: blcsq %rdi, %rcx # sched: [1:0.33] 276 ; GENERIC-NEXT: blcsq (%rsi), %rax # sched: [6:0.50] 277 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 278 ; GENERIC-NEXT: retq # sched: [1:1.00] 279 ; 280 ; BDVER-LABEL: test_x86_tbm_blcs_u64: 281 ; BDVER: # %bb.0: 282 ; BDVER-NEXT: blcsq %rdi, %rcx 283 ; BDVER-NEXT: blcsq (%rsi), %rax 284 ; BDVER-NEXT: addq %rcx, %rax 285 ; BDVER-NEXT: retq 286 %a1 = load i64, i64* %p1 287 %r0 = add i64 %a0, 1 288 %m0 = add i64 %a1, 1 289 %r1 = or i64 %r0, %a0 290 %m1 = or i64 %m0, %a1 291 %res = add i64 %r1, %m1 292 ret i64 %res 293 } 294 295 define i32 @test_x86_tbm_blsfill_u32(i32 %a0, i32* nocapture %p1) nounwind { 296 ; GENERIC-LABEL: test_x86_tbm_blsfill_u32: 297 ; GENERIC: # %bb.0: 298 ; GENERIC-NEXT: blsfilll %edi, %ecx # sched: [1:0.33] 299 ; GENERIC-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50] 300 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 301 ; GENERIC-NEXT: retq # sched: [1:1.00] 302 ; 303 ; BDVER-LABEL: test_x86_tbm_blsfill_u32: 304 ; BDVER: # %bb.0: 305 ; BDVER-NEXT: blsfilll %edi, %ecx 306 ; BDVER-NEXT: blsfilll (%rsi), %eax 307 ; BDVER-NEXT: addl %ecx, %eax 308 ; BDVER-NEXT: retq 309 %a1 = load i32, i32* %p1 310 %r0 = add i32 %a0, -1 311 %m0 = add i32 %a1, -1 312 %r1 = or i32 %r0, %a0 313 %m1 = or i32 %m0, %a1 314 %res = add i32 %r1, %m1 315 ret i32 %res 316 } 317 318 define i64 @test_x86_tbm_blsfill_u64(i64 %a0, i64* nocapture %p1) nounwind { 319 ; GENERIC-LABEL: test_x86_tbm_blsfill_u64: 320 ; GENERIC: # %bb.0: 321 ; GENERIC-NEXT: blsfillq %rdi, %rcx # sched: [1:0.33] 322 ; GENERIC-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50] 323 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 324 ; GENERIC-NEXT: retq # sched: [1:1.00] 325 ; 326 ; BDVER-LABEL: test_x86_tbm_blsfill_u64: 327 ; BDVER: # %bb.0: 328 ; BDVER-NEXT: blsfillq %rdi, %rcx 329 ; BDVER-NEXT: blsfillq (%rsi), %rax 330 ; BDVER-NEXT: addq %rcx, %rax 331 ; BDVER-NEXT: retq 332 %a1 = load i64, i64* %p1 333 %r0 = add i64 %a0, -1 334 %m0 = add i64 %a1, -1 335 %r1 = or i64 %r0, %a0 336 %m1 = or i64 %m0, %a1 337 %res = add i64 %r1, %m1 338 ret i64 %res 339 } 340 341 define i32 @test_x86_tbm_blsic_u32(i32 %a0, i32* nocapture %p1) nounwind { 342 ; GENERIC-LABEL: test_x86_tbm_blsic_u32: 343 ; GENERIC: # %bb.0: 344 ; GENERIC-NEXT: blsicl %edi, %ecx # sched: [1:0.33] 345 ; GENERIC-NEXT: blsicl (%rsi), %eax # sched: [6:0.50] 346 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 347 ; GENERIC-NEXT: retq # sched: [1:1.00] 348 ; 349 ; BDVER-LABEL: test_x86_tbm_blsic_u32: 350 ; BDVER: # %bb.0: 351 ; BDVER-NEXT: blsicl %edi, %ecx 352 ; BDVER-NEXT: blsicl (%rsi), %eax 353 ; BDVER-NEXT: addl %ecx, %eax 354 ; BDVER-NEXT: retq 355 %a1 = load i32, i32* %p1 356 %r0 = xor i32 %a0, -1 357 %m0 = xor i32 %a1, -1 358 %r1 = add i32 %a0, -1 359 %m1 = add i32 %a1, -1 360 %r2 = or i32 %r0, %r1 361 %m2 = or i32 %m0, %m1 362 %res = add i32 %r2, %m2 363 ret i32 %res 364 } 365 366 define i64 @test_x86_tbm_blsic_u64(i64 %a0, i64* nocapture %p1) nounwind { 367 ; GENERIC-LABEL: test_x86_tbm_blsic_u64: 368 ; GENERIC: # %bb.0: 369 ; GENERIC-NEXT: blsicq %rdi, %rcx # sched: [1:0.33] 370 ; GENERIC-NEXT: blsicq (%rsi), %rax # sched: [6:0.50] 371 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 372 ; GENERIC-NEXT: retq # sched: [1:1.00] 373 ; 374 ; BDVER-LABEL: test_x86_tbm_blsic_u64: 375 ; BDVER: # %bb.0: 376 ; BDVER-NEXT: blsicq %rdi, %rcx 377 ; BDVER-NEXT: blsicq (%rsi), %rax 378 ; BDVER-NEXT: addq %rcx, %rax 379 ; BDVER-NEXT: retq 380 %a1 = load i64, i64* %p1 381 %r0 = xor i64 %a0, -1 382 %m0 = xor i64 %a1, -1 383 %r1 = add i64 %a0, -1 384 %m1 = add i64 %a1, -1 385 %r2 = or i64 %r0, %r1 386 %m2 = or i64 %m0, %m1 387 %res = add i64 %r2, %m2 388 ret i64 %res 389 } 390 391 define i32 @test_x86_tbm_t1mskc_u32(i32 %a0, i32* nocapture %p1) nounwind { 392 ; GENERIC-LABEL: test_x86_tbm_t1mskc_u32: 393 ; GENERIC: # %bb.0: 394 ; GENERIC-NEXT: t1mskcl %edi, %ecx # sched: [1:0.33] 395 ; GENERIC-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50] 396 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 397 ; GENERIC-NEXT: retq # sched: [1:1.00] 398 ; 399 ; BDVER-LABEL: test_x86_tbm_t1mskc_u32: 400 ; BDVER: # %bb.0: 401 ; BDVER-NEXT: t1mskcl %edi, %ecx 402 ; BDVER-NEXT: t1mskcl (%rsi), %eax 403 ; BDVER-NEXT: addl %ecx, %eax 404 ; BDVER-NEXT: retq 405 %a1 = load i32, i32* %p1 406 %r0 = xor i32 %a0, -1 407 %m0 = xor i32 %a1, -1 408 %r1 = add i32 %a0, 1 409 %m1 = add i32 %a1, 1 410 %r2 = or i32 %r0, %r1 411 %m2 = or i32 %m0, %m1 412 %res = add i32 %r2, %m2 413 ret i32 %res 414 } 415 416 define i64 @test_x86_tbm_t1mskc_u64(i64 %a0, i64* nocapture %p1) nounwind { 417 ; GENERIC-LABEL: test_x86_tbm_t1mskc_u64: 418 ; GENERIC: # %bb.0: 419 ; GENERIC-NEXT: t1mskcq %rdi, %rcx # sched: [1:0.33] 420 ; GENERIC-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50] 421 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 422 ; GENERIC-NEXT: retq # sched: [1:1.00] 423 ; 424 ; BDVER-LABEL: test_x86_tbm_t1mskc_u64: 425 ; BDVER: # %bb.0: 426 ; BDVER-NEXT: t1mskcq %rdi, %rcx 427 ; BDVER-NEXT: t1mskcq (%rsi), %rax 428 ; BDVER-NEXT: addq %rcx, %rax 429 ; BDVER-NEXT: retq 430 %a1 = load i64, i64* %p1 431 %r0 = xor i64 %a0, -1 432 %m0 = xor i64 %a1, -1 433 %r1 = add i64 %a0, 1 434 %m1 = add i64 %a1, 1 435 %r2 = or i64 %r0, %r1 436 %m2 = or i64 %m0, %m1 437 %res = add i64 %r2, %m2 438 ret i64 %res 439 } 440 441 define i32 @test_x86_tbm_tzmsk_u32(i32 %a0, i32* nocapture %p1) nounwind { 442 ; GENERIC-LABEL: test_x86_tbm_tzmsk_u32: 443 ; GENERIC: # %bb.0: 444 ; GENERIC-NEXT: tzmskl %edi, %ecx # sched: [1:0.33] 445 ; GENERIC-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50] 446 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 447 ; GENERIC-NEXT: retq # sched: [1:1.00] 448 ; 449 ; BDVER-LABEL: test_x86_tbm_tzmsk_u32: 450 ; BDVER: # %bb.0: 451 ; BDVER-NEXT: tzmskl %edi, %ecx 452 ; BDVER-NEXT: tzmskl (%rsi), %eax 453 ; BDVER-NEXT: addl %ecx, %eax 454 ; BDVER-NEXT: retq 455 %a1 = load i32, i32* %p1 456 %r0 = xor i32 %a0, -1 457 %m0 = xor i32 %a1, -1 458 %r1 = add i32 %a0, -1 459 %m1 = add i32 %a1, -1 460 %r2 = and i32 %r0, %r1 461 %m2 = and i32 %m0, %m1 462 %res = add i32 %r2, %m2 463 ret i32 %res 464 } 465 466 define i64 @test_x86_tbm_tzmsk_u64(i64 %a0, i64* nocapture %p1) nounwind { 467 ; GENERIC-LABEL: test_x86_tbm_tzmsk_u64: 468 ; GENERIC: # %bb.0: 469 ; GENERIC-NEXT: tzmskq %rdi, %rcx # sched: [1:0.33] 470 ; GENERIC-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50] 471 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 472 ; GENERIC-NEXT: retq # sched: [1:1.00] 473 ; 474 ; BDVER-LABEL: test_x86_tbm_tzmsk_u64: 475 ; BDVER: # %bb.0: 476 ; BDVER-NEXT: tzmskq %rdi, %rcx 477 ; BDVER-NEXT: tzmskq (%rsi), %rax 478 ; BDVER-NEXT: addq %rcx, %rax 479 ; BDVER-NEXT: retq 480 %a1 = load i64, i64* %p1 481 %r0 = xor i64 %a0, -1 482 %m0 = xor i64 %a1, -1 483 %r1 = add i64 %a0, -1 484 %m1 = add i64 %a1, -1 485 %r2 = and i64 %r0, %r1 486 %m2 = and i64 %m0, %m1 487 %res = add i64 %r2, %m2 488 ret i64 %res 489 } 490