Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl     | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2  | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1  | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
      9 
     10 define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) {
     11 ; GENERIC-LABEL: test_andn_i32:
     12 ; GENERIC:       # %bb.0:
     13 ; GENERIC-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.33]
     14 ; GENERIC-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
     15 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
     16 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     17 ;
     18 ; HASWELL-LABEL: test_andn_i32:
     19 ; HASWELL:       # %bb.0:
     20 ; HASWELL-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
     21 ; HASWELL-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
     22 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
     23 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     24 ;
     25 ; BROADWELL-LABEL: test_andn_i32:
     26 ; BROADWELL:       # %bb.0:
     27 ; BROADWELL-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
     28 ; BROADWELL-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
     29 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
     30 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     31 ;
     32 ; SKYLAKE-LABEL: test_andn_i32:
     33 ; SKYLAKE:       # %bb.0:
     34 ; SKYLAKE-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
     35 ; SKYLAKE-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
     36 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
     37 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     38 ;
     39 ; BTVER2-LABEL: test_andn_i32:
     40 ; BTVER2:       # %bb.0:
     41 ; BTVER2-NEXT:    andnl (%rdx), %edi, %eax # sched: [4:1.00]
     42 ; BTVER2-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
     43 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
     44 ; BTVER2-NEXT:    retq # sched: [4:1.00]
     45 ;
     46 ; ZNVER1-LABEL: test_andn_i32:
     47 ; ZNVER1:       # %bb.0:
     48 ; ZNVER1-NEXT:    andnl (%rdx), %edi, %eax # sched: [5:0.50]
     49 ; ZNVER1-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.25]
     50 ; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
     51 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
     52   %1 = load i32, i32 *%a2
     53   %2 = xor i32 %a0, -1
     54   %3 = and i32 %2, %a1
     55   %4 = and i32 %2, %1
     56   %5 = add i32 %3, %4
     57   ret i32 %5
     58 }
     59 
     60 define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) {
     61 ; GENERIC-LABEL: test_andn_i64:
     62 ; GENERIC:       # %bb.0:
     63 ; GENERIC-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.33]
     64 ; GENERIC-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
     65 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
     66 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     67 ;
     68 ; HASWELL-LABEL: test_andn_i64:
     69 ; HASWELL:       # %bb.0:
     70 ; HASWELL-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
     71 ; HASWELL-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
     72 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
     73 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     74 ;
     75 ; BROADWELL-LABEL: test_andn_i64:
     76 ; BROADWELL:       # %bb.0:
     77 ; BROADWELL-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
     78 ; BROADWELL-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
     79 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
     80 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     81 ;
     82 ; SKYLAKE-LABEL: test_andn_i64:
     83 ; SKYLAKE:       # %bb.0:
     84 ; SKYLAKE-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
     85 ; SKYLAKE-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
     86 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
     87 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     88 ;
     89 ; BTVER2-LABEL: test_andn_i64:
     90 ; BTVER2:       # %bb.0:
     91 ; BTVER2-NEXT:    andnq (%rdx), %rdi, %rax # sched: [4:1.00]
     92 ; BTVER2-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
     93 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
     94 ; BTVER2-NEXT:    retq # sched: [4:1.00]
     95 ;
     96 ; ZNVER1-LABEL: test_andn_i64:
     97 ; ZNVER1:       # %bb.0:
     98 ; ZNVER1-NEXT:    andnq (%rdx), %rdi, %rax # sched: [5:0.50]
     99 ; ZNVER1-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.25]
    100 ; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    101 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    102   %1 = load i64, i64 *%a2
    103   %2 = xor i64 %a0, -1
    104   %3 = and i64 %2, %a1
    105   %4 = and i64 %2, %1
    106   %5 = add i64 %3, %4
    107   ret i64 %5
    108 }
    109 
    110 define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) {
    111 ; GENERIC-LABEL: test_bextr_i32:
    112 ; GENERIC:       # %bb.0:
    113 ; GENERIC-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:1.00]
    114 ; GENERIC-NEXT:    bextrl %edi, %esi, %eax # sched: [2:1.00]
    115 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    116 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    117 ;
    118 ; HASWELL-LABEL: test_bextr_i32:
    119 ; HASWELL:       # %bb.0:
    120 ; HASWELL-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
    121 ; HASWELL-NEXT:    bextrl %edi, %esi, %eax # sched: [2:0.50]
    122 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    123 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    124 ;
    125 ; BROADWELL-LABEL: test_bextr_i32:
    126 ; BROADWELL:       # %bb.0:
    127 ; BROADWELL-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
    128 ; BROADWELL-NEXT:    bextrl %edi, %esi, %eax # sched: [2:0.50]
    129 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    130 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    131 ;
    132 ; SKYLAKE-LABEL: test_bextr_i32:
    133 ; SKYLAKE:       # %bb.0:
    134 ; SKYLAKE-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
    135 ; SKYLAKE-NEXT:    bextrl %edi, %esi, %eax # sched: [2:0.50]
    136 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    137 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    138 ;
    139 ; BTVER2-LABEL: test_bextr_i32:
    140 ; BTVER2:       # %bb.0:
    141 ; BTVER2-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [4:1.00]
    142 ; BTVER2-NEXT:    bextrl %edi, %esi, %eax # sched: [1:0.50]
    143 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
    144 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    145 ;
    146 ; ZNVER1-LABEL: test_bextr_i32:
    147 ; ZNVER1:       # %bb.0:
    148 ; ZNVER1-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [5:0.50]
    149 ; ZNVER1-NEXT:    bextrl %edi, %esi, %eax # sched: [1:0.25]
    150 ; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    151 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    152   %1 = load i32, i32 *%a2
    153   %2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %1, i32 %a0)
    154   %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a1, i32 %a0)
    155   %4 = add i32 %2, %3
    156   ret i32 %4
    157 }
    158 declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
    159 
    160 define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) {
    161 ; GENERIC-LABEL: test_bextr_i64:
    162 ; GENERIC:       # %bb.0:
    163 ; GENERIC-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:1.00]
    164 ; GENERIC-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:1.00]
    165 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    166 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    167 ;
    168 ; HASWELL-LABEL: test_bextr_i64:
    169 ; HASWELL:       # %bb.0:
    170 ; HASWELL-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
    171 ; HASWELL-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:0.50]
    172 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    173 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    174 ;
    175 ; BROADWELL-LABEL: test_bextr_i64:
    176 ; BROADWELL:       # %bb.0:
    177 ; BROADWELL-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
    178 ; BROADWELL-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:0.50]
    179 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    180 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    181 ;
    182 ; SKYLAKE-LABEL: test_bextr_i64:
    183 ; SKYLAKE:       # %bb.0:
    184 ; SKYLAKE-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
    185 ; SKYLAKE-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:0.50]
    186 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    187 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    188 ;
    189 ; BTVER2-LABEL: test_bextr_i64:
    190 ; BTVER2:       # %bb.0:
    191 ; BTVER2-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [4:1.00]
    192 ; BTVER2-NEXT:    bextrq %rdi, %rsi, %rax # sched: [1:0.50]
    193 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
    194 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    195 ;
    196 ; ZNVER1-LABEL: test_bextr_i64:
    197 ; ZNVER1:       # %bb.0:
    198 ; ZNVER1-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [5:0.50]
    199 ; ZNVER1-NEXT:    bextrq %rdi, %rsi, %rax # sched: [1:0.25]
    200 ; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    201 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    202   %1 = load i64, i64 *%a2
    203   %2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %1, i64 %a0)
    204   %3 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a1, i64 %a0)
    205   %4 = add i64 %2, %3
    206   ret i64 %4
    207 }
    208 declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
    209 
    210 define i32 @test_blsi_i32(i32 %a0, i32 *%a1) {
    211 ; GENERIC-LABEL: test_blsi_i32:
    212 ; GENERIC:       # %bb.0:
    213 ; GENERIC-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
    214 ; GENERIC-NEXT:    blsil %edi, %eax # sched: [1:0.33]
    215 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    216 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    217 ;
    218 ; HASWELL-LABEL: test_blsi_i32:
    219 ; HASWELL:       # %bb.0:
    220 ; HASWELL-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
    221 ; HASWELL-NEXT:    blsil %edi, %eax # sched: [1:0.50]
    222 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    223 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    224 ;
    225 ; BROADWELL-LABEL: test_blsi_i32:
    226 ; BROADWELL:       # %bb.0:
    227 ; BROADWELL-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
    228 ; BROADWELL-NEXT:    blsil %edi, %eax # sched: [1:0.50]
    229 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    230 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    231 ;
    232 ; SKYLAKE-LABEL: test_blsi_i32:
    233 ; SKYLAKE:       # %bb.0:
    234 ; SKYLAKE-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
    235 ; SKYLAKE-NEXT:    blsil %edi, %eax # sched: [1:0.50]
    236 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    237 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    238 ;
    239 ; BTVER2-LABEL: test_blsi_i32:
    240 ; BTVER2:       # %bb.0:
    241 ; BTVER2-NEXT:    blsil (%rsi), %ecx # sched: [4:1.00]
    242 ; BTVER2-NEXT:    blsil %edi, %eax # sched: [1:0.50]
    243 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
    244 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    245 ;
    246 ; ZNVER1-LABEL: test_blsi_i32:
    247 ; ZNVER1:       # %bb.0:
    248 ; ZNVER1-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
    249 ; ZNVER1-NEXT:    blsil %edi, %eax # sched: [2:0.25]
    250 ; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    251 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    252   %1 = load i32, i32 *%a1
    253   %2 = sub i32 0, %1
    254   %3 = sub i32 0, %a0
    255   %4 = and i32 %1, %2
    256   %5 = and i32 %a0, %3
    257   %6 = add i32 %4, %5
    258   ret i32 %6
    259 }
    260 
    261 define i64 @test_blsi_i64(i64 %a0, i64 *%a1) {
    262 ; GENERIC-LABEL: test_blsi_i64:
    263 ; GENERIC:       # %bb.0:
    264 ; GENERIC-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
    265 ; GENERIC-NEXT:    blsiq %rdi, %rax # sched: [1:0.33]
    266 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    267 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    268 ;
    269 ; HASWELL-LABEL: test_blsi_i64:
    270 ; HASWELL:       # %bb.0:
    271 ; HASWELL-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
    272 ; HASWELL-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
    273 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    274 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    275 ;
    276 ; BROADWELL-LABEL: test_blsi_i64:
    277 ; BROADWELL:       # %bb.0:
    278 ; BROADWELL-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
    279 ; BROADWELL-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
    280 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    281 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    282 ;
    283 ; SKYLAKE-LABEL: test_blsi_i64:
    284 ; SKYLAKE:       # %bb.0:
    285 ; SKYLAKE-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
    286 ; SKYLAKE-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
    287 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    288 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    289 ;
    290 ; BTVER2-LABEL: test_blsi_i64:
    291 ; BTVER2:       # %bb.0:
    292 ; BTVER2-NEXT:    blsiq (%rsi), %rcx # sched: [4:1.00]
    293 ; BTVER2-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
    294 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
    295 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    296 ;
    297 ; ZNVER1-LABEL: test_blsi_i64:
    298 ; ZNVER1:       # %bb.0:
    299 ; ZNVER1-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
    300 ; ZNVER1-NEXT:    blsiq %rdi, %rax # sched: [2:0.25]
    301 ; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    302 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    303   %1 = load i64, i64 *%a1
    304   %2 = sub i64 0, %1
    305   %3 = sub i64 0, %a0
    306   %4 = and i64 %1, %2
    307   %5 = and i64 %a0, %3
    308   %6 = add i64 %4, %5
    309   ret i64 %6
    310 }
    311 
    312 define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) {
    313 ; GENERIC-LABEL: test_blsmsk_i32:
    314 ; GENERIC:       # %bb.0:
    315 ; GENERIC-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
    316 ; GENERIC-NEXT:    blsmskl %edi, %eax # sched: [1:0.33]
    317 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    318 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    319 ;
    320 ; HASWELL-LABEL: test_blsmsk_i32:
    321 ; HASWELL:       # %bb.0:
    322 ; HASWELL-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
    323 ; HASWELL-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
    324 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    325 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    326 ;
    327 ; BROADWELL-LABEL: test_blsmsk_i32:
    328 ; BROADWELL:       # %bb.0:
    329 ; BROADWELL-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
    330 ; BROADWELL-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
    331 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    332 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    333 ;
    334 ; SKYLAKE-LABEL: test_blsmsk_i32:
    335 ; SKYLAKE:       # %bb.0:
    336 ; SKYLAKE-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
    337 ; SKYLAKE-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
    338 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    339 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    340 ;
    341 ; BTVER2-LABEL: test_blsmsk_i32:
    342 ; BTVER2:       # %bb.0:
    343 ; BTVER2-NEXT:    blsmskl (%rsi), %ecx # sched: [4:1.00]
    344 ; BTVER2-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
    345 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
    346 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    347 ;
    348 ; ZNVER1-LABEL: test_blsmsk_i32:
    349 ; ZNVER1:       # %bb.0:
    350 ; ZNVER1-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
    351 ; ZNVER1-NEXT:    blsmskl %edi, %eax # sched: [2:0.25]
    352 ; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    353 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    354   %1 = load i32, i32 *%a1
    355   %2 = sub i32 %1, 1
    356   %3 = sub i32 %a0, 1
    357   %4 = xor i32 %1, %2
    358   %5 = xor i32 %a0, %3
    359   %6 = add i32 %4, %5
    360   ret i32 %6
    361 }
    362 
    363 define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) {
    364 ; GENERIC-LABEL: test_blsmsk_i64:
    365 ; GENERIC:       # %bb.0:
    366 ; GENERIC-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
    367 ; GENERIC-NEXT:    blsmskq %rdi, %rax # sched: [1:0.33]
    368 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    369 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    370 ;
    371 ; HASWELL-LABEL: test_blsmsk_i64:
    372 ; HASWELL:       # %bb.0:
    373 ; HASWELL-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
    374 ; HASWELL-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
    375 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    376 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    377 ;
    378 ; BROADWELL-LABEL: test_blsmsk_i64:
    379 ; BROADWELL:       # %bb.0:
    380 ; BROADWELL-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
    381 ; BROADWELL-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
    382 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    383 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    384 ;
    385 ; SKYLAKE-LABEL: test_blsmsk_i64:
    386 ; SKYLAKE:       # %bb.0:
    387 ; SKYLAKE-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
    388 ; SKYLAKE-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
    389 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    390 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    391 ;
    392 ; BTVER2-LABEL: test_blsmsk_i64:
    393 ; BTVER2:       # %bb.0:
    394 ; BTVER2-NEXT:    blsmskq (%rsi), %rcx # sched: [4:1.00]
    395 ; BTVER2-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
    396 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
    397 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    398 ;
    399 ; ZNVER1-LABEL: test_blsmsk_i64:
    400 ; ZNVER1:       # %bb.0:
    401 ; ZNVER1-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
    402 ; ZNVER1-NEXT:    blsmskq %rdi, %rax # sched: [2:0.25]
    403 ; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    404 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    405   %1 = load i64, i64 *%a1
    406   %2 = sub i64 %1, 1
    407   %3 = sub i64 %a0, 1
    408   %4 = xor i64 %1, %2
    409   %5 = xor i64 %a0, %3
    410   %6 = add i64 %4, %5
    411   ret i64 %6
    412 }
    413 
    414 define i32 @test_blsr_i32(i32 %a0, i32 *%a1) {
    415 ; GENERIC-LABEL: test_blsr_i32:
    416 ; GENERIC:       # %bb.0:
    417 ; GENERIC-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
    418 ; GENERIC-NEXT:    blsrl %edi, %eax # sched: [1:0.33]
    419 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
    420 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    421 ;
    422 ; HASWELL-LABEL: test_blsr_i32:
    423 ; HASWELL:       # %bb.0:
    424 ; HASWELL-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
    425 ; HASWELL-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
    426 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    427 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    428 ;
    429 ; BROADWELL-LABEL: test_blsr_i32:
    430 ; BROADWELL:       # %bb.0:
    431 ; BROADWELL-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
    432 ; BROADWELL-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
    433 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    434 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    435 ;
    436 ; SKYLAKE-LABEL: test_blsr_i32:
    437 ; SKYLAKE:       # %bb.0:
    438 ; SKYLAKE-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
    439 ; SKYLAKE-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
    440 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    441 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    442 ;
    443 ; BTVER2-LABEL: test_blsr_i32:
    444 ; BTVER2:       # %bb.0:
    445 ; BTVER2-NEXT:    blsrl (%rsi), %ecx # sched: [4:1.00]
    446 ; BTVER2-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
    447 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
    448 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    449 ;
    450 ; ZNVER1-LABEL: test_blsr_i32:
    451 ; ZNVER1:       # %bb.0:
    452 ; ZNVER1-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
    453 ; ZNVER1-NEXT:    blsrl %edi, %eax # sched: [2:0.25]
    454 ; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
    455 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    456   %1 = load i32, i32 *%a1
    457   %2 = sub i32 %1, 1
    458   %3 = sub i32 %a0, 1
    459   %4 = and i32 %1, %2
    460   %5 = and i32 %a0, %3
    461   %6 = add i32 %4, %5
    462   ret i32 %6
    463 }
    464 
    465 define i64 @test_blsr_i64(i64 %a0, i64 *%a1) {
    466 ; GENERIC-LABEL: test_blsr_i64:
    467 ; GENERIC:       # %bb.0:
    468 ; GENERIC-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
    469 ; GENERIC-NEXT:    blsrq %rdi, %rax # sched: [1:0.33]
    470 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
    471 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    472 ;
    473 ; HASWELL-LABEL: test_blsr_i64:
    474 ; HASWELL:       # %bb.0:
    475 ; HASWELL-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
    476 ; HASWELL-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
    477 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    478 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    479 ;
    480 ; BROADWELL-LABEL: test_blsr_i64:
    481 ; BROADWELL:       # %bb.0:
    482 ; BROADWELL-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
    483 ; BROADWELL-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
    484 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    485 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    486 ;
    487 ; SKYLAKE-LABEL: test_blsr_i64:
    488 ; SKYLAKE:       # %bb.0:
    489 ; SKYLAKE-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
    490 ; SKYLAKE-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
    491 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    492 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    493 ;
    494 ; BTVER2-LABEL: test_blsr_i64:
    495 ; BTVER2:       # %bb.0:
    496 ; BTVER2-NEXT:    blsrq (%rsi), %rcx # sched: [4:1.00]
    497 ; BTVER2-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
    498 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
    499 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    500 ;
    501 ; ZNVER1-LABEL: test_blsr_i64:
    502 ; ZNVER1:       # %bb.0:
    503 ; ZNVER1-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
    504 ; ZNVER1-NEXT:    blsrq %rdi, %rax # sched: [2:0.25]
    505 ; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
    506 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    507   %1 = load i64, i64 *%a1
    508   %2 = sub i64 %1, 1
    509   %3 = sub i64 %a0, 1
    510   %4 = and i64 %1, %2
    511   %5 = and i64 %a0, %3
    512   %6 = add i64 %4, %5
    513   ret i64 %6
    514 }
    515 
    516 define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) {
    517 ; GENERIC-LABEL: test_cttz_i16:
    518 ; GENERIC:       # %bb.0:
    519 ; GENERIC-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
    520 ; GENERIC-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
    521 ; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
    522 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
    523 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    524 ;
    525 ; HASWELL-LABEL: test_cttz_i16:
    526 ; HASWELL:       # %bb.0:
    527 ; HASWELL-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
    528 ; HASWELL-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
    529 ; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
    530 ; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
    531 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    532 ;
    533 ; BROADWELL-LABEL: test_cttz_i16:
    534 ; BROADWELL:       # %bb.0:
    535 ; BROADWELL-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
    536 ; BROADWELL-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
    537 ; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
    538 ; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
    539 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    540 ;
    541 ; SKYLAKE-LABEL: test_cttz_i16:
    542 ; SKYLAKE:       # %bb.0:
    543 ; SKYLAKE-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
    544 ; SKYLAKE-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
    545 ; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
    546 ; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
    547 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    548 ;
    549 ; BTVER2-LABEL: test_cttz_i16:
    550 ; BTVER2:       # %bb.0:
    551 ; BTVER2-NEXT:    tzcntw (%rsi), %cx # sched: [5:1.00]
    552 ; BTVER2-NEXT:    tzcntw %di, %ax # sched: [2:1.00]
    553 ; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
    554 ; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
    555 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    556 ;
    557 ; ZNVER1-LABEL: test_cttz_i16:
    558 ; ZNVER1:       # %bb.0:
    559 ; ZNVER1-NEXT:    tzcntw (%rsi), %cx # sched: [6:0.50]
    560 ; ZNVER1-NEXT:    tzcntw %di, %ax # sched: [2:0.25]
    561 ; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
    562 ; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
    563 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    564   %1 = load i16, i16 *%a1
    565   %2 = tail call i16 @llvm.cttz.i16( i16 %1, i1 false )
    566   %3 = tail call i16 @llvm.cttz.i16( i16 %a0, i1 false )
    567   %4 = or i16 %2, %3
    568   ret i16 %4
    569 }
    570 declare i16 @llvm.cttz.i16(i16, i1)
    571 
    572 define i32 @test_cttz_i32(i32 %a0, i32 *%a1) {
    573 ; GENERIC-LABEL: test_cttz_i32:
    574 ; GENERIC:       # %bb.0:
    575 ; GENERIC-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
    576 ; GENERIC-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
    577 ; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
    578 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    579 ;
    580 ; HASWELL-LABEL: test_cttz_i32:
    581 ; HASWELL:       # %bb.0:
    582 ; HASWELL-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
    583 ; HASWELL-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
    584 ; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
    585 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    586 ;
    587 ; BROADWELL-LABEL: test_cttz_i32:
    588 ; BROADWELL:       # %bb.0:
    589 ; BROADWELL-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
    590 ; BROADWELL-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
    591 ; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
    592 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    593 ;
    594 ; SKYLAKE-LABEL: test_cttz_i32:
    595 ; SKYLAKE:       # %bb.0:
    596 ; SKYLAKE-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
    597 ; SKYLAKE-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
    598 ; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
    599 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    600 ;
    601 ; BTVER2-LABEL: test_cttz_i32:
    602 ; BTVER2:       # %bb.0:
    603 ; BTVER2-NEXT:    tzcntl (%rsi), %ecx # sched: [5:1.00]
    604 ; BTVER2-NEXT:    tzcntl %edi, %eax # sched: [2:1.00]
    605 ; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
    606 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    607 ;
    608 ; ZNVER1-LABEL: test_cttz_i32:
    609 ; ZNVER1:       # %bb.0:
    610 ; ZNVER1-NEXT:    tzcntl (%rsi), %ecx # sched: [6:0.50]
    611 ; ZNVER1-NEXT:    tzcntl %edi, %eax # sched: [2:0.25]
    612 ; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
    613 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    614   %1 = load i32, i32 *%a1
    615   %2 = tail call i32 @llvm.cttz.i32( i32 %1, i1 false )
    616   %3 = tail call i32 @llvm.cttz.i32( i32 %a0, i1 false )
    617   %4 = or i32 %2, %3
    618   ret i32 %4
    619 }
    620 declare i32 @llvm.cttz.i32(i32, i1)
    621 
    622 define i64 @test_cttz_i64(i64 %a0, i64 *%a1) {
    623 ; GENERIC-LABEL: test_cttz_i64:
    624 ; GENERIC:       # %bb.0:
    625 ; GENERIC-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
    626 ; GENERIC-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
    627 ; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
    628 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    629 ;
    630 ; HASWELL-LABEL: test_cttz_i64:
    631 ; HASWELL:       # %bb.0:
    632 ; HASWELL-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
    633 ; HASWELL-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
    634 ; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
    635 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    636 ;
    637 ; BROADWELL-LABEL: test_cttz_i64:
    638 ; BROADWELL:       # %bb.0:
    639 ; BROADWELL-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
    640 ; BROADWELL-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
    641 ; BROADWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
    642 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    643 ;
    644 ; SKYLAKE-LABEL: test_cttz_i64:
    645 ; SKYLAKE:       # %bb.0:
    646 ; SKYLAKE-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
    647 ; SKYLAKE-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
    648 ; SKYLAKE-NEXT:    orq %rcx, %rax # sched: [1:0.25]
    649 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    650 ;
    651 ; BTVER2-LABEL: test_cttz_i64:
    652 ; BTVER2:       # %bb.0:
    653 ; BTVER2-NEXT:    tzcntq (%rsi), %rcx # sched: [5:1.00]
    654 ; BTVER2-NEXT:    tzcntq %rdi, %rax # sched: [2:1.00]
    655 ; BTVER2-NEXT:    orq %rcx, %rax # sched: [1:0.50]
    656 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    657 ;
    658 ; ZNVER1-LABEL: test_cttz_i64:
    659 ; ZNVER1:       # %bb.0:
    660 ; ZNVER1-NEXT:    tzcntq (%rsi), %rcx # sched: [6:0.50]
    661 ; ZNVER1-NEXT:    tzcntq %rdi, %rax # sched: [2:0.25]
    662 ; ZNVER1-NEXT:    orq %rcx, %rax # sched: [1:0.25]
    663 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    664   %1 = load i64, i64 *%a1
    665   %2 = tail call i64 @llvm.cttz.i64( i64 %1, i1 false )
    666   %3 = tail call i64 @llvm.cttz.i64( i64 %a0, i1 false )
    667   %4 = or i64 %2, %3
    668   ret i64 %4
    669 }
    670 declare i64 @llvm.cttz.i64(i64, i1)
    671