1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sha | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GOLDMONT 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=cannonlake | FileCheck %s --check-prefix=CHECK --check-prefix=CANNONLAKE 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 6 7 ; 8 ; SHA1 9 ; 10 11 define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 12 ; GENERIC-LABEL: test_sha1msg1: 13 ; GENERIC: # %bb.0: 14 ; GENERIC-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00] 15 ; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [11:1.00] 16 ; GENERIC-NEXT: retq # sched: [1:1.00] 17 ; 18 ; GOLDMONT-LABEL: test_sha1msg1: 19 ; GOLDMONT: # %bb.0: 20 ; GOLDMONT-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:1.00] 21 ; GOLDMONT-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [7:1.00] 22 ; GOLDMONT-NEXT: retq # sched: [4:1.00] 23 ; 24 ; CANNONLAKE-LABEL: test_sha1msg1: 25 ; CANNONLAKE: # %bb.0: 26 ; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.50] 27 ; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:0.50] 28 ; CANNONLAKE-NEXT: retq # sched: [7:1.00] 29 ; 30 ; ZNVER1-LABEL: test_sha1msg1: 31 ; ZNVER1: # %bb.0: 32 ; ZNVER1-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [2:1.00] 33 ; ZNVER1-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [9:1.00] 34 ; ZNVER1-NEXT: retq # sched: [1:0.50] 35 %1 = load <4 x i32>, <4 x i32>* %a2 36 %2 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a0, <4 x i32> %a1) 37 %3 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %2, <4 x i32> %1) 38 ret <4 x i32> %3 39 } 40 declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) 41 42 define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 43 ; GENERIC-LABEL: test_sha1msg2: 44 ; GENERIC: # %bb.0: 45 ; GENERIC-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00] 46 ; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [11:1.00] 47 ; GENERIC-NEXT: retq # sched: [1:1.00] 48 ; 49 ; GOLDMONT-LABEL: test_sha1msg2: 50 ; GOLDMONT: # %bb.0: 51 ; GOLDMONT-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:1.00] 52 ; GOLDMONT-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [7:1.00] 53 ; GOLDMONT-NEXT: retq # sched: [4:1.00] 54 ; 55 ; CANNONLAKE-LABEL: test_sha1msg2: 56 ; CANNONLAKE: # %bb.0: 57 ; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.50] 58 ; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:0.50] 59 ; CANNONLAKE-NEXT: retq # sched: [7:1.00] 60 ; 61 ; ZNVER1-LABEL: test_sha1msg2: 62 ; ZNVER1: # %bb.0: 63 ; ZNVER1-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [1:0.50] 64 ; ZNVER1-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [8:0.50] 65 ; ZNVER1-NEXT: retq # sched: [1:0.50] 66 %1 = load <4 x i32>, <4 x i32>* %a2 67 %2 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a0, <4 x i32> %a1) 68 %3 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %2, <4 x i32> %1) 69 ret <4 x i32> %3 70 } 71 declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) 72 73 define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 74 ; GENERIC-LABEL: test_sha1nexte: 75 ; GENERIC: # %bb.0: 76 ; GENERIC-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00] 77 ; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [11:1.00] 78 ; GENERIC-NEXT: retq # sched: [1:1.00] 79 ; 80 ; GOLDMONT-LABEL: test_sha1nexte: 81 ; GOLDMONT: # %bb.0: 82 ; GOLDMONT-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:1.00] 83 ; GOLDMONT-NEXT: sha1nexte (%rdi), %xmm0 # sched: [7:1.00] 84 ; GOLDMONT-NEXT: retq # sched: [4:1.00] 85 ; 86 ; CANNONLAKE-LABEL: test_sha1nexte: 87 ; CANNONLAKE: # %bb.0: 88 ; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.50] 89 ; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:0.50] 90 ; CANNONLAKE-NEXT: retq # sched: [7:1.00] 91 ; 92 ; ZNVER1-LABEL: test_sha1nexte: 93 ; ZNVER1: # %bb.0: 94 ; ZNVER1-NEXT: sha1nexte %xmm1, %xmm0 # sched: [1:1.00] 95 ; ZNVER1-NEXT: sha1nexte (%rdi), %xmm0 # sched: [8:1.00] 96 ; ZNVER1-NEXT: retq # sched: [1:0.50] 97 %1 = load <4 x i32>, <4 x i32>* %a2 98 %2 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a0, <4 x i32> %a1) 99 %3 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %2, <4 x i32> %1) 100 ret <4 x i32> %3 101 } 102 declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) 103 104 define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 105 ; GENERIC-LABEL: test_sha1rnds4: 106 ; GENERIC: # %bb.0: 107 ; GENERIC-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00] 108 ; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [11:1.00] 109 ; GENERIC-NEXT: retq # sched: [1:1.00] 110 ; 111 ; GOLDMONT-LABEL: test_sha1rnds4: 112 ; GOLDMONT: # %bb.0: 113 ; GOLDMONT-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:1.00] 114 ; GOLDMONT-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [7:1.00] 115 ; GOLDMONT-NEXT: retq # sched: [4:1.00] 116 ; 117 ; CANNONLAKE-LABEL: test_sha1rnds4: 118 ; CANNONLAKE: # %bb.0: 119 ; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.50] 120 ; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50] 121 ; CANNONLAKE-NEXT: retq # sched: [7:1.00] 122 ; 123 ; ZNVER1-LABEL: test_sha1rnds4: 124 ; ZNVER1: # %bb.0: 125 ; ZNVER1-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [6:1.00] 126 ; ZNVER1-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [13:1.00] 127 ; ZNVER1-NEXT: retq # sched: [1:0.50] 128 %1 = load <4 x i32>, <4 x i32>* %a2 129 %2 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, i8 3) 130 %3 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %2, <4 x i32> %1, i8 3) 131 ret <4 x i32> %3 132 } 133 declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) 134 135 ; 136 ; SHA256 137 ; 138 139 define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 140 ; GENERIC-LABEL: test_sha256msg1: 141 ; GENERIC: # %bb.0: 142 ; GENERIC-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00] 143 ; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [11:1.00] 144 ; GENERIC-NEXT: retq # sched: [1:1.00] 145 ; 146 ; GOLDMONT-LABEL: test_sha256msg1: 147 ; GOLDMONT: # %bb.0: 148 ; GOLDMONT-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:1.00] 149 ; GOLDMONT-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [7:1.00] 150 ; GOLDMONT-NEXT: retq # sched: [4:1.00] 151 ; 152 ; CANNONLAKE-LABEL: test_sha256msg1: 153 ; CANNONLAKE: # %bb.0: 154 ; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.50] 155 ; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:0.50] 156 ; CANNONLAKE-NEXT: retq # sched: [7:1.00] 157 ; 158 ; ZNVER1-LABEL: test_sha256msg1: 159 ; ZNVER1: # %bb.0: 160 ; ZNVER1-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [2:1.00] 161 ; ZNVER1-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [9:1.00] 162 ; ZNVER1-NEXT: retq # sched: [1:0.50] 163 %1 = load <4 x i32>, <4 x i32>* %a2 164 %2 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a0, <4 x i32> %a1) 165 %3 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %2, <4 x i32> %1) 166 ret <4 x i32> %3 167 } 168 declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) 169 170 define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 171 ; GENERIC-LABEL: test_sha256msg2: 172 ; GENERIC: # %bb.0: 173 ; GENERIC-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00] 174 ; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [11:1.00] 175 ; GENERIC-NEXT: retq # sched: [1:1.00] 176 ; 177 ; GOLDMONT-LABEL: test_sha256msg2: 178 ; GOLDMONT: # %bb.0: 179 ; GOLDMONT-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:1.00] 180 ; GOLDMONT-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [7:1.00] 181 ; GOLDMONT-NEXT: retq # sched: [4:1.00] 182 ; 183 ; CANNONLAKE-LABEL: test_sha256msg2: 184 ; CANNONLAKE: # %bb.0: 185 ; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.50] 186 ; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:0.50] 187 ; CANNONLAKE-NEXT: retq # sched: [7:1.00] 188 ; 189 ; ZNVER1-LABEL: test_sha256msg2: 190 ; ZNVER1: # %bb.0: 191 ; ZNVER1-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [100:0.25] 192 ; ZNVER1-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [100:0.25] 193 ; ZNVER1-NEXT: retq # sched: [1:0.50] 194 %1 = load <4 x i32>, <4 x i32>* %a2 195 %2 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a0, <4 x i32> %a1) 196 %3 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %2, <4 x i32> %1) 197 ret <4 x i32> %3 198 } 199 declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) 200 201 define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> *%a3) { 202 ; GENERIC-LABEL: test_sha256rnds2: 203 ; GENERIC: # %bb.0: 204 ; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] 205 ; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 206 ; GENERIC-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00] 207 ; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00] 208 ; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] 209 ; GENERIC-NEXT: retq # sched: [1:1.00] 210 ; 211 ; GOLDMONT-LABEL: test_sha256rnds2: 212 ; GOLDMONT: # %bb.0: 213 ; GOLDMONT-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] 214 ; GOLDMONT-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] 215 ; GOLDMONT-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00] 216 ; GOLDMONT-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [7:1.00] 217 ; GOLDMONT-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] 218 ; GOLDMONT-NEXT: retq # sched: [4:1.00] 219 ; 220 ; CANNONLAKE-LABEL: test_sha256rnds2: 221 ; CANNONLAKE: # %bb.0: 222 ; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33] 223 ; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 224 ; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.50] 225 ; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50] 226 ; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33] 227 ; CANNONLAKE-NEXT: retq # sched: [7:1.00] 228 ; 229 ; ZNVER1-LABEL: test_sha256rnds2: 230 ; ZNVER1: # %bb.0: 231 ; ZNVER1-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.25] 232 ; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.25] 233 ; ZNVER1-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00] 234 ; ZNVER1-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00] 235 ; ZNVER1-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.25] 236 ; ZNVER1-NEXT: retq # sched: [1:0.50] 237 %1 = load <4 x i32>, <4 x i32>* %a3 238 %2 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) 239 %3 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %2, <4 x i32> %1, <4 x i32> %a2) 240 ret <4 x i32> %3 241 } 242 declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) 243