Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
      6 
      7 define i32 @test_x86_tbm_bextri_u32(i32 %a0, i32* nocapture %p1) nounwind {
      8 ; GENERIC-LABEL: test_x86_tbm_bextri_u32:
      9 ; GENERIC:       # %bb.0:
     10 ; GENERIC-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
     11 ; GENERIC-NEXT:    # sched: [2:1.00]
     12 ; GENERIC-NEXT:    bextrl $3076, (%rsi), %eax # imm = 0xC04
     13 ; GENERIC-NEXT:    # sched: [7:1.00]
     14 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
     15 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     16 ;
     17 ; BDVER-LABEL: test_x86_tbm_bextri_u32:
     18 ; BDVER:       # %bb.0:
     19 ; BDVER-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
     20 ; BDVER-NEXT:    bextrl $3076, (%rsi), %eax # imm = 0xC04
     21 ; BDVER-NEXT:    addl %ecx, %eax
     22 ; BDVER-NEXT:    retq
     23   %a1 = load i32, i32* %p1
     24   %r0 = lshr i32 %a0, 4
     25   %m0 = lshr i32 %a1, 4
     26   %r1 = and i32 %r0, 4095
     27   %m1 = and i32 %m0, 4095
     28   %res = add i32 %r1, %m1
     29   ret i32 %res
     30 }
     31 
     32 define i64 @test_x86_tbm_bextri_u64(i64 %a0, i64* nocapture %p1) nounwind {
     33 ; GENERIC-LABEL: test_x86_tbm_bextri_u64:
     34 ; GENERIC:       # %bb.0:
     35 ; GENERIC-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
     36 ; GENERIC-NEXT:    # sched: [2:1.00]
     37 ; GENERIC-NEXT:    bextrl $3076, (%rsi), %eax # imm = 0xC04
     38 ; GENERIC-NEXT:    # sched: [7:1.00]
     39 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
     40 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     41 ;
     42 ; BDVER-LABEL: test_x86_tbm_bextri_u64:
     43 ; BDVER:       # %bb.0:
     44 ; BDVER-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
     45 ; BDVER-NEXT:    bextrl $3076, (%rsi), %eax # imm = 0xC04
     46 ; BDVER-NEXT:    addq %rcx, %rax
     47 ; BDVER-NEXT:    retq
     48   %a1 = load i64, i64* %p1
     49   %r0 = lshr i64 %a0, 4
     50   %m0 = lshr i64 %a1, 4
     51   %r1 = and i64 %r0, 4095
     52   %m1 = and i64 %m0, 4095
     53   %res = add i64 %r1, %m1
     54   ret i64 %res
     55 }
     56 
     57 define i32 @test_x86_tbm_blcfill_u32(i32 %a0, i32* nocapture %p1) nounwind {
     58 ; GENERIC-LABEL: test_x86_tbm_blcfill_u32:
     59 ; GENERIC:       # %bb.0:
     60 ; GENERIC-NEXT:    blcfilll %edi, %ecx # sched: [1:0.33]
     61 ; GENERIC-NEXT:    blcfilll (%rsi), %eax # sched: [6:0.50]
     62 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
     63 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     64 ;
     65 ; BDVER-LABEL: test_x86_tbm_blcfill_u32:
     66 ; BDVER:       # %bb.0:
     67 ; BDVER-NEXT:    blcfilll %edi, %ecx
     68 ; BDVER-NEXT:    blcfilll (%rsi), %eax
     69 ; BDVER-NEXT:    addl %ecx, %eax
     70 ; BDVER-NEXT:    retq
     71   %a1 = load i32, i32* %p1
     72   %r0 = add i32 %a0, 1
     73   %m0 = add i32 %a1, 1
     74   %r1 = and i32 %r0, %a0
     75   %m1 = and i32 %m0, %a1
     76   %res = add i32 %r1, %m1
     77   ret i32 %res
     78 }
     79 
     80 define i64 @test_x86_tbm_blcfill_u64(i64 %a0, i64* nocapture %p1) nounwind {
     81 ; GENERIC-LABEL: test_x86_tbm_blcfill_u64:
     82 ; GENERIC:       # %bb.0:
     83 ; GENERIC-NEXT:    blcfillq %rdi, %rcx # sched: [1:0.33]
     84 ; GENERIC-NEXT:    blcfillq (%rsi), %rax # sched: [6:0.50]
     85 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
     86 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     87 ;
     88 ; BDVER-LABEL: test_x86_tbm_blcfill_u64:
     89 ; BDVER:       # %bb.0:
     90 ; BDVER-NEXT:    blcfillq %rdi, %rcx
     91 ; BDVER-NEXT:    blcfillq (%rsi), %rax
     92 ; BDVER-NEXT:    addq %rcx, %rax
     93 ; BDVER-NEXT:    retq
     94   %a1 = load i64, i64* %p1
     95   %r0 = add i64 %a0, 1
     96   %m0 = add i64 %a1, 1
     97   %r1 = and i64 %r0, %a0
     98   %m1 = and i64 %m0, %a1
     99   %res = add i64 %r1, %m1
    100   ret i64 %res
    101 }
    102 
    103 define i32 @test_x86_tbm_blci_u32(i32 %a0, i32* nocapture %p1) nounwind {
    104 ; GENERIC-LABEL: test_x86_tbm_blci_u32:
    105 ; GENERIC:       # %bb.0:
    106 ; GENERIC-NEXT:    blcil %edi, %ecx # sched: [1:0.33]
    107 ; GENERIC-NEXT:    blcil (%rsi), %eax # sched: [6:0.50]
    108 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    109 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    110 ;
    111 ; BDVER-LABEL: test_x86_tbm_blci_u32:
    112 ; BDVER:       # %bb.0:
    113 ; BDVER-NEXT:    blcil %edi, %ecx
    114 ; BDVER-NEXT:    blcil (%rsi), %eax
    115 ; BDVER-NEXT:    addl %ecx, %eax
    116 ; BDVER-NEXT:    retq
    117   %a1 = load i32, i32* %p1
    118   %r0 = add i32 1, %a0
    119   %m0 = add i32 1, %a1
    120   %r1 = xor i32 %r0, -1
    121   %m1 = xor i32 %m0, -1
    122   %r2 = or i32 %r1, %a0
    123   %m2 = or i32 %m1, %a1
    124   %res = add i32 %r2, %m2
    125   ret i32 %res
    126 }
    127 
    128 define i64 @test_x86_tbm_blci_u64(i64 %a0, i64* nocapture %p1) nounwind {
    129 ; GENERIC-LABEL: test_x86_tbm_blci_u64:
    130 ; GENERIC:       # %bb.0:
    131 ; GENERIC-NEXT:    blciq %rdi, %rcx # sched: [1:0.33]
    132 ; GENERIC-NEXT:    blciq (%rsi), %rax # sched: [6:0.50]
    133 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    134 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    135 ;
    136 ; BDVER-LABEL: test_x86_tbm_blci_u64:
    137 ; BDVER:       # %bb.0:
    138 ; BDVER-NEXT:    blciq %rdi, %rcx
    139 ; BDVER-NEXT:    blciq (%rsi), %rax
    140 ; BDVER-NEXT:    addq %rcx, %rax
    141 ; BDVER-NEXT:    retq
    142   %a1 = load i64, i64* %p1
    143   %r0 = add i64 1, %a0
    144   %m0 = add i64 1, %a1
    145   %r1 = xor i64 %r0, -1
    146   %m1 = xor i64 %m0, -1
    147   %r2 = or i64 %r1, %a0
    148   %m2 = or i64 %m1, %a1
    149   %res = add i64 %r2, %m2
    150   ret i64 %res
    151 }
    152 
    153 define i32 @test_x86_tbm_blcic_u32(i32 %a0, i32* nocapture %p1) nounwind {
    154 ; GENERIC-LABEL: test_x86_tbm_blcic_u32:
    155 ; GENERIC:       # %bb.0:
    156 ; GENERIC-NEXT:    blcicl %edi, %ecx # sched: [1:0.33]
    157 ; GENERIC-NEXT:    blcicl (%rsi), %eax # sched: [6:0.50]
    158 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    159 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    160 ;
    161 ; BDVER-LABEL: test_x86_tbm_blcic_u32:
    162 ; BDVER:       # %bb.0:
    163 ; BDVER-NEXT:    blcicl %edi, %ecx
    164 ; BDVER-NEXT:    blcicl (%rsi), %eax
    165 ; BDVER-NEXT:    addl %ecx, %eax
    166 ; BDVER-NEXT:    retq
    167   %a1 = load i32, i32* %p1
    168   %r0 = xor i32 %a0, -1
    169   %m0 = xor i32 %a1, -1
    170   %r1 = add i32 %a0, 1
    171   %m1 = add i32 %a1, 1
    172   %r2 = and i32 %r1, %r0
    173   %m2 = and i32 %m1, %m0
    174   %res = add i32 %r2, %m2
    175   ret i32 %res
    176 }
    177 
    178 define i64 @test_x86_tbm_blcic_u64(i64 %a0, i64* nocapture %p1) nounwind {
    179 ; GENERIC-LABEL: test_x86_tbm_blcic_u64:
    180 ; GENERIC:       # %bb.0:
    181 ; GENERIC-NEXT:    blcicq %rdi, %rcx # sched: [1:0.33]
    182 ; GENERIC-NEXT:    blcicq (%rsi), %rax # sched: [6:0.50]
    183 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    184 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    185 ;
    186 ; BDVER-LABEL: test_x86_tbm_blcic_u64:
    187 ; BDVER:       # %bb.0:
    188 ; BDVER-NEXT:    blcicq %rdi, %rcx
    189 ; BDVER-NEXT:    blcicq (%rsi), %rax
    190 ; BDVER-NEXT:    addq %rcx, %rax
    191 ; BDVER-NEXT:    retq
    192   %a1 = load i64, i64* %p1
    193   %r0 = xor i64 %a0, -1
    194   %m0 = xor i64 %a1, -1
    195   %r1 = add i64 %a0, 1
    196   %m1 = add i64 %a1, 1
    197   %r2 = and i64 %r1, %r0
    198   %m2 = and i64 %m1, %m0
    199   %res = add i64 %r2, %m2
    200   ret i64 %res
    201 }
    202 
    203 define i32 @test_x86_tbm_blcmsk_u32(i32 %a0, i32* nocapture %p1) nounwind {
    204 ; GENERIC-LABEL: test_x86_tbm_blcmsk_u32:
    205 ; GENERIC:       # %bb.0:
    206 ; GENERIC-NEXT:    blcmskl %edi, %ecx # sched: [1:0.33]
    207 ; GENERIC-NEXT:    blcmskl (%rsi), %eax # sched: [6:0.50]
    208 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    209 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    210 ;
    211 ; BDVER-LABEL: test_x86_tbm_blcmsk_u32:
    212 ; BDVER:       # %bb.0:
    213 ; BDVER-NEXT:    blcmskl %edi, %ecx
    214 ; BDVER-NEXT:    blcmskl (%rsi), %eax
    215 ; BDVER-NEXT:    addl %ecx, %eax
    216 ; BDVER-NEXT:    retq
    217   %a1 = load i32, i32* %p1
    218   %r0 = add i32 %a0, 1
    219   %m0 = add i32 %a1, 1
    220   %r1 = xor i32 %r0, %a0
    221   %m1 = xor i32 %m0, %a1
    222   %res = add i32 %r1, %m1
    223   ret i32 %res
    224 }
    225 
    226 define i64 @test_x86_tbm_blcmsk_u64(i64 %a0, i64* nocapture %p1) nounwind {
    227 ; GENERIC-LABEL: test_x86_tbm_blcmsk_u64:
    228 ; GENERIC:       # %bb.0:
    229 ; GENERIC-NEXT:    blcmskq %rdi, %rcx # sched: [1:0.33]
    230 ; GENERIC-NEXT:    blcmskq (%rsi), %rax # sched: [6:0.50]
    231 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    232 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    233 ;
    234 ; BDVER-LABEL: test_x86_tbm_blcmsk_u64:
    235 ; BDVER:       # %bb.0:
    236 ; BDVER-NEXT:    blcmskq %rdi, %rcx
    237 ; BDVER-NEXT:    blcmskq (%rsi), %rax
    238 ; BDVER-NEXT:    addq %rcx, %rax
    239 ; BDVER-NEXT:    retq
    240   %a1 = load i64, i64* %p1
    241   %r0 = add i64 %a0, 1
    242   %m0 = add i64 %a1, 1
    243   %r1 = xor i64 %r0, %a0
    244   %m1 = xor i64 %m0, %a1
    245   %res = add i64 %r1, %m1
    246   ret i64 %res
    247 }
    248 
    249 define i32 @test_x86_tbm_blcs_u32(i32 %a0, i32* nocapture %p1) nounwind {
    250 ; GENERIC-LABEL: test_x86_tbm_blcs_u32:
    251 ; GENERIC:       # %bb.0:
    252 ; GENERIC-NEXT:    blcsl %edi, %ecx # sched: [1:0.33]
    253 ; GENERIC-NEXT:    blcsl (%rsi), %eax # sched: [6:0.50]
    254 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    255 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    256 ;
    257 ; BDVER-LABEL: test_x86_tbm_blcs_u32:
    258 ; BDVER:       # %bb.0:
    259 ; BDVER-NEXT:    blcsl %edi, %ecx
    260 ; BDVER-NEXT:    blcsl (%rsi), %eax
    261 ; BDVER-NEXT:    addl %ecx, %eax
    262 ; BDVER-NEXT:    retq
    263   %a1 = load i32, i32* %p1
    264   %r0 = add i32 %a0, 1
    265   %m0 = add i32 %a1, 1
    266   %r1 = or i32 %r0, %a0
    267   %m1 = or i32 %m0, %a1
    268   %res = add i32 %r1, %m1
    269   ret i32 %res
    270 }
    271 
    272 define i64 @test_x86_tbm_blcs_u64(i64 %a0, i64* nocapture %p1) nounwind {
    273 ; GENERIC-LABEL: test_x86_tbm_blcs_u64:
    274 ; GENERIC:       # %bb.0:
    275 ; GENERIC-NEXT:    blcsq %rdi, %rcx # sched: [1:0.33]
    276 ; GENERIC-NEXT:    blcsq (%rsi), %rax # sched: [6:0.50]
    277 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    278 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    279 ;
    280 ; BDVER-LABEL: test_x86_tbm_blcs_u64:
    281 ; BDVER:       # %bb.0:
    282 ; BDVER-NEXT:    blcsq %rdi, %rcx
    283 ; BDVER-NEXT:    blcsq (%rsi), %rax
    284 ; BDVER-NEXT:    addq %rcx, %rax
    285 ; BDVER-NEXT:    retq
    286   %a1 = load i64, i64* %p1
    287   %r0 = add i64 %a0, 1
    288   %m0 = add i64 %a1, 1
    289   %r1 = or i64 %r0, %a0
    290   %m1 = or i64 %m0, %a1
    291   %res = add i64 %r1, %m1
    292   ret i64 %res
    293 }
    294 
    295 define i32 @test_x86_tbm_blsfill_u32(i32 %a0, i32* nocapture %p1) nounwind {
    296 ; GENERIC-LABEL: test_x86_tbm_blsfill_u32:
    297 ; GENERIC:       # %bb.0:
    298 ; GENERIC-NEXT:    blsfilll %edi, %ecx # sched: [1:0.33]
    299 ; GENERIC-NEXT:    blsfilll (%rsi), %eax # sched: [6:0.50]
    300 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    301 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    302 ;
    303 ; BDVER-LABEL: test_x86_tbm_blsfill_u32:
    304 ; BDVER:       # %bb.0:
    305 ; BDVER-NEXT:    blsfilll %edi, %ecx
    306 ; BDVER-NEXT:    blsfilll (%rsi), %eax
    307 ; BDVER-NEXT:    addl %ecx, %eax
    308 ; BDVER-NEXT:    retq
    309   %a1 = load i32, i32* %p1
    310   %r0 = add i32 %a0, -1
    311   %m0 = add i32 %a1, -1
    312   %r1 = or i32 %r0, %a0
    313   %m1 = or i32 %m0, %a1
    314   %res = add i32 %r1, %m1
    315   ret i32 %res
    316 }
    317 
    318 define i64 @test_x86_tbm_blsfill_u64(i64 %a0, i64* nocapture %p1) nounwind {
    319 ; GENERIC-LABEL: test_x86_tbm_blsfill_u64:
    320 ; GENERIC:       # %bb.0:
    321 ; GENERIC-NEXT:    blsfillq %rdi, %rcx # sched: [1:0.33]
    322 ; GENERIC-NEXT:    blsfillq (%rsi), %rax # sched: [6:0.50]
    323 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    324 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    325 ;
    326 ; BDVER-LABEL: test_x86_tbm_blsfill_u64:
    327 ; BDVER:       # %bb.0:
    328 ; BDVER-NEXT:    blsfillq %rdi, %rcx
    329 ; BDVER-NEXT:    blsfillq (%rsi), %rax
    330 ; BDVER-NEXT:    addq %rcx, %rax
    331 ; BDVER-NEXT:    retq
    332   %a1 = load i64, i64* %p1
    333   %r0 = add i64 %a0, -1
    334   %m0 = add i64 %a1, -1
    335   %r1 = or i64 %r0, %a0
    336   %m1 = or i64 %m0, %a1
    337   %res = add i64 %r1, %m1
    338   ret i64 %res
    339 }
    340 
    341 define i32 @test_x86_tbm_blsic_u32(i32 %a0, i32* nocapture %p1) nounwind {
    342 ; GENERIC-LABEL: test_x86_tbm_blsic_u32:
    343 ; GENERIC:       # %bb.0:
    344 ; GENERIC-NEXT:    blsicl %edi, %ecx # sched: [1:0.33]
    345 ; GENERIC-NEXT:    blsicl (%rsi), %eax # sched: [6:0.50]
    346 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    347 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    348 ;
    349 ; BDVER-LABEL: test_x86_tbm_blsic_u32:
    350 ; BDVER:       # %bb.0:
    351 ; BDVER-NEXT:    blsicl %edi, %ecx
    352 ; BDVER-NEXT:    blsicl (%rsi), %eax
    353 ; BDVER-NEXT:    addl %ecx, %eax
    354 ; BDVER-NEXT:    retq
    355   %a1 = load i32, i32* %p1
    356   %r0 = xor i32 %a0, -1
    357   %m0 = xor i32 %a1, -1
    358   %r1 = add i32 %a0, -1
    359   %m1 = add i32 %a1, -1
    360   %r2 = or i32 %r0, %r1
    361   %m2 = or i32 %m0, %m1
    362   %res = add i32 %r2, %m2
    363   ret i32 %res
    364 }
    365 
    366 define i64 @test_x86_tbm_blsic_u64(i64 %a0, i64* nocapture %p1) nounwind {
    367 ; GENERIC-LABEL: test_x86_tbm_blsic_u64:
    368 ; GENERIC:       # %bb.0:
    369 ; GENERIC-NEXT:    blsicq %rdi, %rcx # sched: [1:0.33]
    370 ; GENERIC-NEXT:    blsicq (%rsi), %rax # sched: [6:0.50]
    371 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    372 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    373 ;
    374 ; BDVER-LABEL: test_x86_tbm_blsic_u64:
    375 ; BDVER:       # %bb.0:
    376 ; BDVER-NEXT:    blsicq %rdi, %rcx
    377 ; BDVER-NEXT:    blsicq (%rsi), %rax
    378 ; BDVER-NEXT:    addq %rcx, %rax
    379 ; BDVER-NEXT:    retq
    380   %a1 = load i64, i64* %p1
    381   %r0 = xor i64 %a0, -1
    382   %m0 = xor i64 %a1, -1
    383   %r1 = add i64 %a0, -1
    384   %m1 = add i64 %a1, -1
    385   %r2 = or i64 %r0, %r1
    386   %m2 = or i64 %m0, %m1
    387   %res = add i64 %r2, %m2
    388   ret i64 %res
    389 }
    390 
    391 define i32 @test_x86_tbm_t1mskc_u32(i32 %a0, i32* nocapture %p1) nounwind {
    392 ; GENERIC-LABEL: test_x86_tbm_t1mskc_u32:
    393 ; GENERIC:       # %bb.0:
    394 ; GENERIC-NEXT:    t1mskcl %edi, %ecx # sched: [1:0.33]
    395 ; GENERIC-NEXT:    t1mskcl (%rsi), %eax # sched: [6:0.50]
    396 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    397 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    398 ;
    399 ; BDVER-LABEL: test_x86_tbm_t1mskc_u32:
    400 ; BDVER:       # %bb.0:
    401 ; BDVER-NEXT:    t1mskcl %edi, %ecx
    402 ; BDVER-NEXT:    t1mskcl (%rsi), %eax
    403 ; BDVER-NEXT:    addl %ecx, %eax
    404 ; BDVER-NEXT:    retq
    405   %a1 = load i32, i32* %p1
    406   %r0 = xor i32 %a0, -1
    407   %m0 = xor i32 %a1, -1
    408   %r1 = add i32 %a0, 1
    409   %m1 = add i32 %a1, 1
    410   %r2 = or i32 %r0, %r1
    411   %m2 = or i32 %m0, %m1
    412   %res = add i32 %r2, %m2
    413   ret i32 %res
    414 }
    415 
    416 define i64 @test_x86_tbm_t1mskc_u64(i64 %a0, i64* nocapture %p1) nounwind {
    417 ; GENERIC-LABEL: test_x86_tbm_t1mskc_u64:
    418 ; GENERIC:       # %bb.0:
    419 ; GENERIC-NEXT:    t1mskcq %rdi, %rcx # sched: [1:0.33]
    420 ; GENERIC-NEXT:    t1mskcq (%rsi), %rax # sched: [6:0.50]
    421 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    422 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    423 ;
    424 ; BDVER-LABEL: test_x86_tbm_t1mskc_u64:
    425 ; BDVER:       # %bb.0:
    426 ; BDVER-NEXT:    t1mskcq %rdi, %rcx
    427 ; BDVER-NEXT:    t1mskcq (%rsi), %rax
    428 ; BDVER-NEXT:    addq %rcx, %rax
    429 ; BDVER-NEXT:    retq
    430   %a1 = load i64, i64* %p1
    431   %r0 = xor i64 %a0, -1
    432   %m0 = xor i64 %a1, -1
    433   %r1 = add i64 %a0, 1
    434   %m1 = add i64 %a1, 1
    435   %r2 = or i64 %r0, %r1
    436   %m2 = or i64 %m0, %m1
    437   %res = add i64 %r2, %m2
    438   ret i64 %res
    439 }
    440 
    441 define i32 @test_x86_tbm_tzmsk_u32(i32 %a0, i32* nocapture %p1) nounwind {
    442 ; GENERIC-LABEL: test_x86_tbm_tzmsk_u32:
    443 ; GENERIC:       # %bb.0:
    444 ; GENERIC-NEXT:    tzmskl %edi, %ecx # sched: [1:0.33]
    445 ; GENERIC-NEXT:    tzmskl (%rsi), %eax # sched: [6:0.50]
    446 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    447 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    448 ;
    449 ; BDVER-LABEL: test_x86_tbm_tzmsk_u32:
    450 ; BDVER:       # %bb.0:
    451 ; BDVER-NEXT:    tzmskl %edi, %ecx
    452 ; BDVER-NEXT:    tzmskl (%rsi), %eax
    453 ; BDVER-NEXT:    addl %ecx, %eax
    454 ; BDVER-NEXT:    retq
    455   %a1 = load i32, i32* %p1
    456   %r0 = xor i32 %a0, -1
    457   %m0 = xor i32 %a1, -1
    458   %r1 = add i32 %a0, -1
    459   %m1 = add i32 %a1, -1
    460   %r2 = and i32 %r0, %r1
    461   %m2 = and i32 %m0, %m1
    462   %res = add i32 %r2, %m2
    463   ret i32 %res
    464 }
    465 
    466 define i64 @test_x86_tbm_tzmsk_u64(i64 %a0, i64* nocapture %p1) nounwind {
    467 ; GENERIC-LABEL: test_x86_tbm_tzmsk_u64:
    468 ; GENERIC:       # %bb.0:
    469 ; GENERIC-NEXT:    tzmskq %rdi, %rcx # sched: [1:0.33]
    470 ; GENERIC-NEXT:    tzmskq (%rsi), %rax # sched: [6:0.50]
    471 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    472 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    473 ;
    474 ; BDVER-LABEL: test_x86_tbm_tzmsk_u64:
    475 ; BDVER:       # %bb.0:
    476 ; BDVER-NEXT:    tzmskq %rdi, %rcx
    477 ; BDVER-NEXT:    tzmskq (%rsi), %rax
    478 ; BDVER-NEXT:    addq %rcx, %rax
    479 ; BDVER-NEXT:    retq
    480   %a1 = load i64, i64* %p1
    481   %r0 = xor i64 %a0, -1
    482   %m0 = xor i64 %a1, -1
    483   %r1 = add i64 %a0, -1
    484   %m1 = add i64 %a1, -1
    485   %r2 = and i64 %r0, %r1
    486   %m2 = and i64 %m0, %m1
    487   %res = add i64 %r2, %m2
    488   ret i64 %res
    489 }
    490