Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sha | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GOLDMONT
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=cannonlake | FileCheck %s --check-prefix=CHECK --check-prefix=CANNONLAKE
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
      6 
      7 ;
      8 ; SHA1
      9 ;
     10 
     11 define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
     12 ; GENERIC-LABEL: test_sha1msg1:
     13 ; GENERIC:       # %bb.0:
     14 ; GENERIC-NEXT:    sha1msg1 %xmm1, %xmm0 # sched: [5:1.00]
     15 ; GENERIC-NEXT:    sha1msg1 (%rdi), %xmm0 # sched: [11:1.00]
     16 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     17 ;
     18 ; GOLDMONT-LABEL: test_sha1msg1:
     19 ; GOLDMONT:       # %bb.0:
     20 ; GOLDMONT-NEXT:    sha1msg1 %xmm1, %xmm0 # sched: [4:1.00]
     21 ; GOLDMONT-NEXT:    sha1msg1 (%rdi), %xmm0 # sched: [7:1.00]
     22 ; GOLDMONT-NEXT:    retq # sched: [4:1.00]
     23 ;
     24 ; CANNONLAKE-LABEL: test_sha1msg1:
     25 ; CANNONLAKE:       # %bb.0:
     26 ; CANNONLAKE-NEXT:    sha1msg1 %xmm1, %xmm0 # sched: [4:0.50]
     27 ; CANNONLAKE-NEXT:    sha1msg1 (%rdi), %xmm0 # sched: [10:0.50]
     28 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
     29 ;
     30 ; ZNVER1-LABEL: test_sha1msg1:
     31 ; ZNVER1:       # %bb.0:
     32 ; ZNVER1-NEXT:    sha1msg1 %xmm1, %xmm0 # sched: [2:1.00]
     33 ; ZNVER1-NEXT:    sha1msg1 (%rdi), %xmm0 # sched: [9:1.00]
     34 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
     35   %1 = load <4 x i32>, <4 x i32>* %a2
     36   %2 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a0, <4 x i32> %a1)
     37   %3 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %2, <4 x i32> %1)
     38   ret <4 x i32> %3
     39 }
     40 declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>)
     41 
     42 define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
     43 ; GENERIC-LABEL: test_sha1msg2:
     44 ; GENERIC:       # %bb.0:
     45 ; GENERIC-NEXT:    sha1msg2 %xmm1, %xmm0 # sched: [5:1.00]
     46 ; GENERIC-NEXT:    sha1msg2 (%rdi), %xmm0 # sched: [11:1.00]
     47 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     48 ;
     49 ; GOLDMONT-LABEL: test_sha1msg2:
     50 ; GOLDMONT:       # %bb.0:
     51 ; GOLDMONT-NEXT:    sha1msg2 %xmm1, %xmm0 # sched: [4:1.00]
     52 ; GOLDMONT-NEXT:    sha1msg2 (%rdi), %xmm0 # sched: [7:1.00]
     53 ; GOLDMONT-NEXT:    retq # sched: [4:1.00]
     54 ;
     55 ; CANNONLAKE-LABEL: test_sha1msg2:
     56 ; CANNONLAKE:       # %bb.0:
     57 ; CANNONLAKE-NEXT:    sha1msg2 %xmm1, %xmm0 # sched: [4:0.50]
     58 ; CANNONLAKE-NEXT:    sha1msg2 (%rdi), %xmm0 # sched: [10:0.50]
     59 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
     60 ;
     61 ; ZNVER1-LABEL: test_sha1msg2:
     62 ; ZNVER1:       # %bb.0:
     63 ; ZNVER1-NEXT:    sha1msg2 %xmm1, %xmm0 # sched: [1:0.50]
     64 ; ZNVER1-NEXT:    sha1msg2 (%rdi), %xmm0 # sched: [8:0.50]
     65 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
     66   %1 = load <4 x i32>, <4 x i32>* %a2
     67   %2 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a0, <4 x i32> %a1)
     68   %3 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %2, <4 x i32> %1)
     69   ret <4 x i32> %3
     70 }
     71 declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>)
     72 
     73 define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
     74 ; GENERIC-LABEL: test_sha1nexte:
     75 ; GENERIC:       # %bb.0:
     76 ; GENERIC-NEXT:    sha1nexte %xmm1, %xmm0 # sched: [5:1.00]
     77 ; GENERIC-NEXT:    sha1nexte (%rdi), %xmm0 # sched: [11:1.00]
     78 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     79 ;
     80 ; GOLDMONT-LABEL: test_sha1nexte:
     81 ; GOLDMONT:       # %bb.0:
     82 ; GOLDMONT-NEXT:    sha1nexte %xmm1, %xmm0 # sched: [4:1.00]
     83 ; GOLDMONT-NEXT:    sha1nexte (%rdi), %xmm0 # sched: [7:1.00]
     84 ; GOLDMONT-NEXT:    retq # sched: [4:1.00]
     85 ;
     86 ; CANNONLAKE-LABEL: test_sha1nexte:
     87 ; CANNONLAKE:       # %bb.0:
     88 ; CANNONLAKE-NEXT:    sha1nexte %xmm1, %xmm0 # sched: [4:0.50]
     89 ; CANNONLAKE-NEXT:    sha1nexte (%rdi), %xmm0 # sched: [10:0.50]
     90 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
     91 ;
     92 ; ZNVER1-LABEL: test_sha1nexte:
     93 ; ZNVER1:       # %bb.0:
     94 ; ZNVER1-NEXT:    sha1nexte %xmm1, %xmm0 # sched: [1:1.00]
     95 ; ZNVER1-NEXT:    sha1nexte (%rdi), %xmm0 # sched: [8:1.00]
     96 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
     97   %1 = load <4 x i32>, <4 x i32>* %a2
     98   %2 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a0, <4 x i32> %a1)
     99   %3 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %2, <4 x i32> %1)
    100   ret <4 x i32> %3
    101 }
    102 declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>)
    103 
    104 define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
    105 ; GENERIC-LABEL: test_sha1rnds4:
    106 ; GENERIC:       # %bb.0:
    107 ; GENERIC-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00]
    108 ; GENERIC-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # sched: [11:1.00]
    109 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    110 ;
    111 ; GOLDMONT-LABEL: test_sha1rnds4:
    112 ; GOLDMONT:       # %bb.0:
    113 ; GOLDMONT-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:1.00]
    114 ; GOLDMONT-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # sched: [7:1.00]
    115 ; GOLDMONT-NEXT:    retq # sched: [4:1.00]
    116 ;
    117 ; CANNONLAKE-LABEL: test_sha1rnds4:
    118 ; CANNONLAKE:       # %bb.0:
    119 ; CANNONLAKE-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.50]
    120 ; CANNONLAKE-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50]
    121 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
    122 ;
    123 ; ZNVER1-LABEL: test_sha1rnds4:
    124 ; ZNVER1:       # %bb.0:
    125 ; ZNVER1-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # sched: [6:1.00]
    126 ; ZNVER1-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # sched: [13:1.00]
    127 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    128   %1 = load <4 x i32>, <4 x i32>* %a2
    129   %2 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, i8 3)
    130   %3 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %2, <4 x i32> %1, i8 3)
    131   ret <4 x i32> %3
    132 }
    133 declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8)
    134 
    135 ;
    136 ; SHA256
    137 ;
    138 
    139 define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
    140 ; GENERIC-LABEL: test_sha256msg1:
    141 ; GENERIC:       # %bb.0:
    142 ; GENERIC-NEXT:    sha256msg1 %xmm1, %xmm0 # sched: [5:1.00]
    143 ; GENERIC-NEXT:    sha256msg1 (%rdi), %xmm0 # sched: [11:1.00]
    144 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    145 ;
    146 ; GOLDMONT-LABEL: test_sha256msg1:
    147 ; GOLDMONT:       # %bb.0:
    148 ; GOLDMONT-NEXT:    sha256msg1 %xmm1, %xmm0 # sched: [4:1.00]
    149 ; GOLDMONT-NEXT:    sha256msg1 (%rdi), %xmm0 # sched: [7:1.00]
    150 ; GOLDMONT-NEXT:    retq # sched: [4:1.00]
    151 ;
    152 ; CANNONLAKE-LABEL: test_sha256msg1:
    153 ; CANNONLAKE:       # %bb.0:
    154 ; CANNONLAKE-NEXT:    sha256msg1 %xmm1, %xmm0 # sched: [4:0.50]
    155 ; CANNONLAKE-NEXT:    sha256msg1 (%rdi), %xmm0 # sched: [10:0.50]
    156 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
    157 ;
    158 ; ZNVER1-LABEL: test_sha256msg1:
    159 ; ZNVER1:       # %bb.0:
    160 ; ZNVER1-NEXT:    sha256msg1 %xmm1, %xmm0 # sched: [2:1.00]
    161 ; ZNVER1-NEXT:    sha256msg1 (%rdi), %xmm0 # sched: [9:1.00]
    162 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    163   %1 = load <4 x i32>, <4 x i32>* %a2
    164   %2 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a0, <4 x i32> %a1)
    165   %3 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %2, <4 x i32> %1)
    166   ret <4 x i32> %3
    167 }
    168 declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>)
    169 
    170 define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
    171 ; GENERIC-LABEL: test_sha256msg2:
    172 ; GENERIC:       # %bb.0:
    173 ; GENERIC-NEXT:    sha256msg2 %xmm1, %xmm0 # sched: [5:1.00]
    174 ; GENERIC-NEXT:    sha256msg2 (%rdi), %xmm0 # sched: [11:1.00]
    175 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    176 ;
    177 ; GOLDMONT-LABEL: test_sha256msg2:
    178 ; GOLDMONT:       # %bb.0:
    179 ; GOLDMONT-NEXT:    sha256msg2 %xmm1, %xmm0 # sched: [4:1.00]
    180 ; GOLDMONT-NEXT:    sha256msg2 (%rdi), %xmm0 # sched: [7:1.00]
    181 ; GOLDMONT-NEXT:    retq # sched: [4:1.00]
    182 ;
    183 ; CANNONLAKE-LABEL: test_sha256msg2:
    184 ; CANNONLAKE:       # %bb.0:
    185 ; CANNONLAKE-NEXT:    sha256msg2 %xmm1, %xmm0 # sched: [4:0.50]
    186 ; CANNONLAKE-NEXT:    sha256msg2 (%rdi), %xmm0 # sched: [10:0.50]
    187 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
    188 ;
    189 ; ZNVER1-LABEL: test_sha256msg2:
    190 ; ZNVER1:       # %bb.0:
    191 ; ZNVER1-NEXT:    sha256msg2 %xmm1, %xmm0 # sched: [100:0.25]
    192 ; ZNVER1-NEXT:    sha256msg2 (%rdi), %xmm0 # sched: [100:0.25]
    193 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    194   %1 = load <4 x i32>, <4 x i32>* %a2
    195   %2 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a0, <4 x i32> %a1)
    196   %3 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %2, <4 x i32> %1)
    197   ret <4 x i32> %3
    198 }
    199 declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>)
    200 
    201 define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> *%a3) {
    202 ; GENERIC-LABEL: test_sha256rnds2:
    203 ; GENERIC:       # %bb.0:
    204 ; GENERIC-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
    205 ; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
    206 ; GENERIC-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00]
    207 ; GENERIC-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00]
    208 ; GENERIC-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
    209 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    210 ;
    211 ; GOLDMONT-LABEL: test_sha256rnds2:
    212 ; GOLDMONT:       # %bb.0:
    213 ; GOLDMONT-NEXT:    movaps %xmm0, %xmm3 # sched: [1:0.50]
    214 ; GOLDMONT-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.50]
    215 ; GOLDMONT-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00]
    216 ; GOLDMONT-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [7:1.00]
    217 ; GOLDMONT-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.50]
    218 ; GOLDMONT-NEXT:    retq # sched: [4:1.00]
    219 ;
    220 ; CANNONLAKE-LABEL: test_sha256rnds2:
    221 ; CANNONLAKE:       # %bb.0:
    222 ; CANNONLAKE-NEXT:    vmovaps %xmm0, %xmm3 # sched: [1:0.33]
    223 ; CANNONLAKE-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:0.33]
    224 ; CANNONLAKE-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.50]
    225 ; CANNONLAKE-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50]
    226 ; CANNONLAKE-NEXT:    vmovaps %xmm3, %xmm0 # sched: [1:0.33]
    227 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
    228 ;
    229 ; ZNVER1-LABEL: test_sha256rnds2:
    230 ; ZNVER1:       # %bb.0:
    231 ; ZNVER1-NEXT:    vmovaps %xmm0, %xmm3 # sched: [1:0.25]
    232 ; ZNVER1-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:0.25]
    233 ; ZNVER1-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00]
    234 ; ZNVER1-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00]
    235 ; ZNVER1-NEXT:    vmovaps %xmm3, %xmm0 # sched: [1:0.25]
    236 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    237   %1 = load <4 x i32>, <4 x i32>* %a3
    238   %2 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
    239   %3 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %2, <4 x i32> %1, <4 x i32> %a2)
    240   ret <4 x i32> %3
    241 }
    242 declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>)
    243