1 #!/usr/bin/env perl 2 3 # ==================================================================== 4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL 5 # project. The module is, however, dual licensed under OpenSSL and 6 # CRYPTOGAMS licenses depending on where you obtain it. For further 7 # details see http://www.openssl.org/~appro/cryptogams/. 8 # ==================================================================== 9 10 # AES for s390x. 11 12 # April 2007. 13 # 14 # Software performance improvement over gcc-generated code is ~70% and 15 # in absolute terms is ~73 cycles per byte processed with 128-bit key. 16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are 17 # *strictly* in-order execution and issued instruction [in this case 18 # load value from memory is critical] has to complete before execution 19 # flow proceeds. S-boxes are compressed to 2KB[+256B]. 20 # 21 # As for hardware acceleration support. It's basically a "teaser," as 22 # it can and should be improved in several ways. Most notably support 23 # for CBC is not utilized, nor multiple blocks are ever processed. 24 # Then software key schedule can be postponed till hardware support 25 # detection... Performance improvement over assembler is reportedly 26 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper 27 # support is implemented. 28 29 # May 2007. 30 # 31 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided 32 # for 128-bit keys, if hardware support is detected. 33 34 # Januray 2009. 35 # 36 # Add support for hardware AES192/256 and reschedule instructions to 37 # minimize/avoid Address Generation Interlock hazard and to favour 38 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and 39 # almost 50% on z9. The gain is smaller on z10, because being dual- 40 # issue z10 makes it improssible to eliminate the interlock condition: 41 # critial path is not long enough. Yet it spends ~24 cycles per byte 42 # processed with 128-bit key. 43 # 44 # Unlike previous version hardware support detection takes place only 45 # at the moment of key schedule setup, which is denoted in key->rounds. 46 # This is done, because deferred key setup can't be made MT-safe, not 47 # for keys longer than 128 bits. 48 # 49 # Add AES_cbc_encrypt, which gives incredible performance improvement, 50 # it was measured to be ~6.6x. It's less than previously mentioned 8x, 51 # because software implementation was optimized. 52 53 # May 2010. 54 # 55 # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x 56 # performance improvement over "generic" counter mode routine relying 57 # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers 58 # to the fact that exact throughput value depends on current stack 59 # frame alignment within 4KB page. In worst case you get ~75% of the 60 # maximum, but *on average* it would be as much as ~98%. Meaning that 61 # worst case is unlike, it's like hitting ravine on plateau. 62 63 # November 2010. 64 # 65 # Adapt for -m31 build. If kernel supports what's called "highgprs" 66 # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit 67 # instructions and achieve "64-bit" performance even in 31-bit legacy 68 # application context. The feature is not specific to any particular 69 # processor, as long as it's "z-CPU". Latter implies that the code 70 # remains z/Architecture specific. On z990 it was measured to perform 71 # 2x better than code generated by gcc 4.3. 72 73 # December 2010. 74 # 75 # Add support for z196 "cipher message with counter" instruction. 76 # Note however that it's disengaged, because it was measured to 77 # perform ~12% worse than vanilla km-based code... 78 79 # February 2011. 80 # 81 # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes 82 # instructions, which deliver ~70% improvement at 8KB block size over 83 # vanilla km-based code, 37% - at most like 512-bytes block size. 84 85 $flavour = shift; 86 87 if ($flavour =~ /3[12]/) { 88 $SIZE_T=4; 89 $g=""; 90 } else { 91 $SIZE_T=8; 92 $g="g"; 93 } 94 95 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 96 open STDOUT,">$output"; 97 98 $softonly=0; # allow hardware support 99 100 $t0="%r0"; $mask="%r0"; 101 $t1="%r1"; 102 $t2="%r2"; $inp="%r2"; 103 $t3="%r3"; $out="%r3"; $bits="%r3"; 104 $key="%r4"; 105 $i1="%r5"; 106 $i2="%r6"; 107 $i3="%r7"; 108 $s0="%r8"; 109 $s1="%r9"; 110 $s2="%r10"; 111 $s3="%r11"; 112 $tbl="%r12"; 113 $rounds="%r13"; 114 $ra="%r14"; 115 $sp="%r15"; 116 117 $stdframe=16*$SIZE_T+4*8; 118 119 sub _data_word() 120 { my $i; 121 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } 122 } 123 124 $code=<<___; 125 .text 126 127 .type AES_Te,\@object 128 .align 256 129 AES_Te: 130 ___ 131 &_data_word( 132 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 133 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 134 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 135 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 136 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 137 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 138 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 139 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 140 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 141 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 142 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 143 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 144 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 145 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 146 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 147 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 148 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 149 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 150 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 151 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 152 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 153 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 154 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 155 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 156 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 157 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 158 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 159 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 160 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 161 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 162 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 163 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 164 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 165 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 166 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 167 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 168 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 169 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 170 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 171 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 172 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 173 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 174 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 175 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 176 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 177 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 178 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 179 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 180 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 181 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 182 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 183 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 184 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 185 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 186 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 187 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 188 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 189 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 190 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 191 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 192 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 193 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 194 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 195 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 196 $code.=<<___; 197 # Te4[256] 198 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 199 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 200 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 201 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 202 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 203 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 204 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 205 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 206 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 207 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 208 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 209 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 210 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 211 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 212 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 213 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 214 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 215 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 216 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 217 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 218 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 219 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 220 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 221 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 222 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 223 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 224 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 225 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 226 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 227 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 228 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 229 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 230 # rcon[] 231 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000 232 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000 233 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 234 .align 256 235 .size AES_Te,.-AES_Te 236 237 # void AES_encrypt(const unsigned char *inp, unsigned char *out, 238 # const AES_KEY *key) { 239 .globl AES_encrypt 240 .type AES_encrypt,\@function 241 AES_encrypt: 242 ___ 243 $code.=<<___ if (!$softonly); 244 l %r0,240($key) 245 lhi %r1,16 246 clr %r0,%r1 247 jl .Lesoft 248 249 la %r1,0($key) 250 #la %r2,0($inp) 251 la %r4,0($out) 252 lghi %r3,16 # single block length 253 .long 0xb92e0042 # km %r4,%r2 254 brc 1,.-4 # can this happen? 255 br %r14 256 .align 64 257 .Lesoft: 258 ___ 259 $code.=<<___; 260 stm${g} %r3,$ra,3*$SIZE_T($sp) 261 262 llgf $s0,0($inp) 263 llgf $s1,4($inp) 264 llgf $s2,8($inp) 265 llgf $s3,12($inp) 266 267 larl $tbl,AES_Te 268 bras $ra,_s390x_AES_encrypt 269 270 l${g} $out,3*$SIZE_T($sp) 271 st $s0,0($out) 272 st $s1,4($out) 273 st $s2,8($out) 274 st $s3,12($out) 275 276 lm${g} %r6,$ra,6*$SIZE_T($sp) 277 br $ra 278 .size AES_encrypt,.-AES_encrypt 279 280 .type _s390x_AES_encrypt,\@function 281 .align 16 282 _s390x_AES_encrypt: 283 st${g} $ra,15*$SIZE_T($sp) 284 x $s0,0($key) 285 x $s1,4($key) 286 x $s2,8($key) 287 x $s3,12($key) 288 l $rounds,240($key) 289 llill $mask,`0xff<<3` 290 aghi $rounds,-1 291 j .Lenc_loop 292 .align 16 293 .Lenc_loop: 294 sllg $t1,$s0,`0+3` 295 srlg $t2,$s0,`8-3` 296 srlg $t3,$s0,`16-3` 297 srl $s0,`24-3` 298 nr $s0,$mask 299 ngr $t1,$mask 300 nr $t2,$mask 301 nr $t3,$mask 302 303 srlg $i1,$s1,`16-3` # i0 304 sllg $i2,$s1,`0+3` 305 srlg $i3,$s1,`8-3` 306 srl $s1,`24-3` 307 nr $i1,$mask 308 nr $s1,$mask 309 ngr $i2,$mask 310 nr $i3,$mask 311 312 l $s0,0($s0,$tbl) # Te0[s0>>24] 313 l $t1,1($t1,$tbl) # Te3[s0>>0] 314 l $t2,2($t2,$tbl) # Te2[s0>>8] 315 l $t3,3($t3,$tbl) # Te1[s0>>16] 316 317 x $s0,3($i1,$tbl) # Te1[s1>>16] 318 l $s1,0($s1,$tbl) # Te0[s1>>24] 319 x $t2,1($i2,$tbl) # Te3[s1>>0] 320 x $t3,2($i3,$tbl) # Te2[s1>>8] 321 322 srlg $i1,$s2,`8-3` # i0 323 srlg $i2,$s2,`16-3` # i1 324 nr $i1,$mask 325 nr $i2,$mask 326 sllg $i3,$s2,`0+3` 327 srl $s2,`24-3` 328 nr $s2,$mask 329 ngr $i3,$mask 330 331 xr $s1,$t1 332 srlg $ra,$s3,`8-3` # i1 333 sllg $t1,$s3,`0+3` # i0 334 nr $ra,$mask 335 la $key,16($key) 336 ngr $t1,$mask 337 338 x $s0,2($i1,$tbl) # Te2[s2>>8] 339 x $s1,3($i2,$tbl) # Te1[s2>>16] 340 l $s2,0($s2,$tbl) # Te0[s2>>24] 341 x $t3,1($i3,$tbl) # Te3[s2>>0] 342 343 srlg $i3,$s3,`16-3` # i2 344 xr $s2,$t2 345 srl $s3,`24-3` 346 nr $i3,$mask 347 nr $s3,$mask 348 349 x $s0,0($key) 350 x $s1,4($key) 351 x $s2,8($key) 352 x $t3,12($key) 353 354 x $s0,1($t1,$tbl) # Te3[s3>>0] 355 x $s1,2($ra,$tbl) # Te2[s3>>8] 356 x $s2,3($i3,$tbl) # Te1[s3>>16] 357 l $s3,0($s3,$tbl) # Te0[s3>>24] 358 xr $s3,$t3 359 360 brct $rounds,.Lenc_loop 361 .align 16 362 363 sllg $t1,$s0,`0+3` 364 srlg $t2,$s0,`8-3` 365 ngr $t1,$mask 366 srlg $t3,$s0,`16-3` 367 srl $s0,`24-3` 368 nr $s0,$mask 369 nr $t2,$mask 370 nr $t3,$mask 371 372 srlg $i1,$s1,`16-3` # i0 373 sllg $i2,$s1,`0+3` 374 ngr $i2,$mask 375 srlg $i3,$s1,`8-3` 376 srl $s1,`24-3` 377 nr $i1,$mask 378 nr $s1,$mask 379 nr $i3,$mask 380 381 llgc $s0,2($s0,$tbl) # Te4[s0>>24] 382 llgc $t1,2($t1,$tbl) # Te4[s0>>0] 383 sll $s0,24 384 llgc $t2,2($t2,$tbl) # Te4[s0>>8] 385 llgc $t3,2($t3,$tbl) # Te4[s0>>16] 386 sll $t2,8 387 sll $t3,16 388 389 llgc $i1,2($i1,$tbl) # Te4[s1>>16] 390 llgc $s1,2($s1,$tbl) # Te4[s1>>24] 391 llgc $i2,2($i2,$tbl) # Te4[s1>>0] 392 llgc $i3,2($i3,$tbl) # Te4[s1>>8] 393 sll $i1,16 394 sll $s1,24 395 sll $i3,8 396 or $s0,$i1 397 or $s1,$t1 398 or $t2,$i2 399 or $t3,$i3 400 401 srlg $i1,$s2,`8-3` # i0 402 srlg $i2,$s2,`16-3` # i1 403 nr $i1,$mask 404 nr $i2,$mask 405 sllg $i3,$s2,`0+3` 406 srl $s2,`24-3` 407 ngr $i3,$mask 408 nr $s2,$mask 409 410 sllg $t1,$s3,`0+3` # i0 411 srlg $ra,$s3,`8-3` # i1 412 ngr $t1,$mask 413 414 llgc $i1,2($i1,$tbl) # Te4[s2>>8] 415 llgc $i2,2($i2,$tbl) # Te4[s2>>16] 416 sll $i1,8 417 llgc $s2,2($s2,$tbl) # Te4[s2>>24] 418 llgc $i3,2($i3,$tbl) # Te4[s2>>0] 419 sll $i2,16 420 nr $ra,$mask 421 sll $s2,24 422 or $s0,$i1 423 or $s1,$i2 424 or $s2,$t2 425 or $t3,$i3 426 427 srlg $i3,$s3,`16-3` # i2 428 srl $s3,`24-3` 429 nr $i3,$mask 430 nr $s3,$mask 431 432 l $t0,16($key) 433 l $t2,20($key) 434 435 llgc $i1,2($t1,$tbl) # Te4[s3>>0] 436 llgc $i2,2($ra,$tbl) # Te4[s3>>8] 437 llgc $i3,2($i3,$tbl) # Te4[s3>>16] 438 llgc $s3,2($s3,$tbl) # Te4[s3>>24] 439 sll $i2,8 440 sll $i3,16 441 sll $s3,24 442 or $s0,$i1 443 or $s1,$i2 444 or $s2,$i3 445 or $s3,$t3 446 447 l${g} $ra,15*$SIZE_T($sp) 448 xr $s0,$t0 449 xr $s1,$t2 450 x $s2,24($key) 451 x $s3,28($key) 452 453 br $ra 454 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt 455 ___ 456 457 $code.=<<___; 458 .type AES_Td,\@object 459 .align 256 460 AES_Td: 461 ___ 462 &_data_word( 463 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 464 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 465 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 466 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 467 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 468 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 469 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 470 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 471 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 472 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 473 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 474 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 475 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 476 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 477 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 478 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 479 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 480 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 481 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 482 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 483 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 484 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 485 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 486 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 487 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 488 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 489 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 490 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 491 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 492 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 493 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 494 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 495 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 496 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 497 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 498 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 499 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 500 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 501 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 502 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 503 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 504 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 505 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 506 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 507 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 508 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 509 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 510 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 511 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 512 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 513 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 514 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 515 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 516 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 517 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 518 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 519 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 520 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 521 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 522 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 523 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 524 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 525 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 526 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 527 $code.=<<___; 528 # Td4[256] 529 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 530 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 531 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 532 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 533 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 534 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 535 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 536 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 537 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 538 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 539 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 540 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 541 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 542 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 543 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 544 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 545 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 546 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 547 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 548 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 549 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 550 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 551 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 552 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 553 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 554 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 555 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 556 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 557 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 558 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 559 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 560 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 561 .size AES_Td,.-AES_Td 562 563 # void AES_decrypt(const unsigned char *inp, unsigned char *out, 564 # const AES_KEY *key) { 565 .globl AES_decrypt 566 .type AES_decrypt,\@function 567 AES_decrypt: 568 ___ 569 $code.=<<___ if (!$softonly); 570 l %r0,240($key) 571 lhi %r1,16 572 clr %r0,%r1 573 jl .Ldsoft 574 575 la %r1,0($key) 576 #la %r2,0($inp) 577 la %r4,0($out) 578 lghi %r3,16 # single block length 579 .long 0xb92e0042 # km %r4,%r2 580 brc 1,.-4 # can this happen? 581 br %r14 582 .align 64 583 .Ldsoft: 584 ___ 585 $code.=<<___; 586 stm${g} %r3,$ra,3*$SIZE_T($sp) 587 588 llgf $s0,0($inp) 589 llgf $s1,4($inp) 590 llgf $s2,8($inp) 591 llgf $s3,12($inp) 592 593 larl $tbl,AES_Td 594 bras $ra,_s390x_AES_decrypt 595 596 l${g} $out,3*$SIZE_T($sp) 597 st $s0,0($out) 598 st $s1,4($out) 599 st $s2,8($out) 600 st $s3,12($out) 601 602 lm${g} %r6,$ra,6*$SIZE_T($sp) 603 br $ra 604 .size AES_decrypt,.-AES_decrypt 605 606 .type _s390x_AES_decrypt,\@function 607 .align 16 608 _s390x_AES_decrypt: 609 st${g} $ra,15*$SIZE_T($sp) 610 x $s0,0($key) 611 x $s1,4($key) 612 x $s2,8($key) 613 x $s3,12($key) 614 l $rounds,240($key) 615 llill $mask,`0xff<<3` 616 aghi $rounds,-1 617 j .Ldec_loop 618 .align 16 619 .Ldec_loop: 620 srlg $t1,$s0,`16-3` 621 srlg $t2,$s0,`8-3` 622 sllg $t3,$s0,`0+3` 623 srl $s0,`24-3` 624 nr $s0,$mask 625 nr $t1,$mask 626 nr $t2,$mask 627 ngr $t3,$mask 628 629 sllg $i1,$s1,`0+3` # i0 630 srlg $i2,$s1,`16-3` 631 srlg $i3,$s1,`8-3` 632 srl $s1,`24-3` 633 ngr $i1,$mask 634 nr $s1,$mask 635 nr $i2,$mask 636 nr $i3,$mask 637 638 l $s0,0($s0,$tbl) # Td0[s0>>24] 639 l $t1,3($t1,$tbl) # Td1[s0>>16] 640 l $t2,2($t2,$tbl) # Td2[s0>>8] 641 l $t3,1($t3,$tbl) # Td3[s0>>0] 642 643 x $s0,1($i1,$tbl) # Td3[s1>>0] 644 l $s1,0($s1,$tbl) # Td0[s1>>24] 645 x $t2,3($i2,$tbl) # Td1[s1>>16] 646 x $t3,2($i3,$tbl) # Td2[s1>>8] 647 648 srlg $i1,$s2,`8-3` # i0 649 sllg $i2,$s2,`0+3` # i1 650 srlg $i3,$s2,`16-3` 651 srl $s2,`24-3` 652 nr $i1,$mask 653 ngr $i2,$mask 654 nr $s2,$mask 655 nr $i3,$mask 656 657 xr $s1,$t1 658 srlg $ra,$s3,`8-3` # i1 659 srlg $t1,$s3,`16-3` # i0 660 nr $ra,$mask 661 la $key,16($key) 662 nr $t1,$mask 663 664 x $s0,2($i1,$tbl) # Td2[s2>>8] 665 x $s1,1($i2,$tbl) # Td3[s2>>0] 666 l $s2,0($s2,$tbl) # Td0[s2>>24] 667 x $t3,3($i3,$tbl) # Td1[s2>>16] 668 669 sllg $i3,$s3,`0+3` # i2 670 srl $s3,`24-3` 671 ngr $i3,$mask 672 nr $s3,$mask 673 674 xr $s2,$t2 675 x $s0,0($key) 676 x $s1,4($key) 677 x $s2,8($key) 678 x $t3,12($key) 679 680 x $s0,3($t1,$tbl) # Td1[s3>>16] 681 x $s1,2($ra,$tbl) # Td2[s3>>8] 682 x $s2,1($i3,$tbl) # Td3[s3>>0] 683 l $s3,0($s3,$tbl) # Td0[s3>>24] 684 xr $s3,$t3 685 686 brct $rounds,.Ldec_loop 687 .align 16 688 689 l $t1,`2048+0`($tbl) # prefetch Td4 690 l $t2,`2048+64`($tbl) 691 l $t3,`2048+128`($tbl) 692 l $i1,`2048+192`($tbl) 693 llill $mask,0xff 694 695 srlg $i3,$s0,24 # i0 696 srlg $t1,$s0,16 697 srlg $t2,$s0,8 698 nr $s0,$mask # i3 699 nr $t1,$mask 700 701 srlg $i1,$s1,24 702 nr $t2,$mask 703 srlg $i2,$s1,16 704 srlg $ra,$s1,8 705 nr $s1,$mask # i0 706 nr $i2,$mask 707 nr $ra,$mask 708 709 llgc $i3,2048($i3,$tbl) # Td4[s0>>24] 710 llgc $t1,2048($t1,$tbl) # Td4[s0>>16] 711 llgc $t2,2048($t2,$tbl) # Td4[s0>>8] 712 sll $t1,16 713 llgc $t3,2048($s0,$tbl) # Td4[s0>>0] 714 sllg $s0,$i3,24 715 sll $t2,8 716 717 llgc $s1,2048($s1,$tbl) # Td4[s1>>0] 718 llgc $i1,2048($i1,$tbl) # Td4[s1>>24] 719 llgc $i2,2048($i2,$tbl) # Td4[s1>>16] 720 sll $i1,24 721 llgc $i3,2048($ra,$tbl) # Td4[s1>>8] 722 sll $i2,16 723 sll $i3,8 724 or $s0,$s1 725 or $t1,$i1 726 or $t2,$i2 727 or $t3,$i3 728 729 srlg $i1,$s2,8 # i0 730 srlg $i2,$s2,24 731 srlg $i3,$s2,16 732 nr $s2,$mask # i1 733 nr $i1,$mask 734 nr $i3,$mask 735 llgc $i1,2048($i1,$tbl) # Td4[s2>>8] 736 llgc $s1,2048($s2,$tbl) # Td4[s2>>0] 737 llgc $i2,2048($i2,$tbl) # Td4[s2>>24] 738 llgc $i3,2048($i3,$tbl) # Td4[s2>>16] 739 sll $i1,8 740 sll $i2,24 741 or $s0,$i1 742 sll $i3,16 743 or $t2,$i2 744 or $t3,$i3 745 746 srlg $i1,$s3,16 # i0 747 srlg $i2,$s3,8 # i1 748 srlg $i3,$s3,24 749 nr $s3,$mask # i2 750 nr $i1,$mask 751 nr $i2,$mask 752 753 l${g} $ra,15*$SIZE_T($sp) 754 or $s1,$t1 755 l $t0,16($key) 756 l $t1,20($key) 757 758 llgc $i1,2048($i1,$tbl) # Td4[s3>>16] 759 llgc $i2,2048($i2,$tbl) # Td4[s3>>8] 760 sll $i1,16 761 llgc $s2,2048($s3,$tbl) # Td4[s3>>0] 762 llgc $s3,2048($i3,$tbl) # Td4[s3>>24] 763 sll $i2,8 764 sll $s3,24 765 or $s0,$i1 766 or $s1,$i2 767 or $s2,$t2 768 or $s3,$t3 769 770 xr $s0,$t0 771 xr $s1,$t1 772 x $s2,24($key) 773 x $s3,28($key) 774 775 br $ra 776 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt 777 ___ 778 779 $code.=<<___; 780 # void AES_set_encrypt_key(const unsigned char *in, int bits, 781 # AES_KEY *key) { 782 .globl private_AES_set_encrypt_key 783 .type private_AES_set_encrypt_key,\@function 784 .align 16 785 private_AES_set_encrypt_key: 786 _s390x_AES_set_encrypt_key: 787 lghi $t0,0 788 cl${g}r $inp,$t0 789 je .Lminus1 790 cl${g}r $key,$t0 791 je .Lminus1 792 793 lghi $t0,128 794 clr $bits,$t0 795 je .Lproceed 796 lghi $t0,192 797 clr $bits,$t0 798 je .Lproceed 799 lghi $t0,256 800 clr $bits,$t0 801 je .Lproceed 802 lghi %r2,-2 803 br %r14 804 805 .align 16 806 .Lproceed: 807 ___ 808 $code.=<<___ if (!$softonly); 809 # convert bits to km code, [128,192,256]->[18,19,20] 810 lhi %r5,-128 811 lhi %r0,18 812 ar %r5,$bits 813 srl %r5,6 814 ar %r5,%r0 815 816 larl %r1,OPENSSL_s390xcap_P 817 lg %r0,0(%r1) 818 tmhl %r0,0x4000 # check for message-security assist 819 jz .Lekey_internal 820 821 lghi %r0,0 # query capability vector 822 la %r1,16($sp) 823 .long 0xb92f0042 # kmc %r4,%r2 824 825 llihh %r1,0x8000 826 srlg %r1,%r1,0(%r5) 827 ng %r1,16($sp) 828 jz .Lekey_internal 829 830 lmg %r0,%r1,0($inp) # just copy 128 bits... 831 stmg %r0,%r1,0($key) 832 lhi %r0,192 833 cr $bits,%r0 834 jl 1f 835 lg %r1,16($inp) 836 stg %r1,16($key) 837 je 1f 838 lg %r1,24($inp) 839 stg %r1,24($key) 840 1: st $bits,236($key) # save bits [for debugging purposes] 841 lgr $t0,%r5 842 st %r5,240($key) # save km code 843 lghi %r2,0 844 br %r14 845 ___ 846 $code.=<<___; 847 .align 16 848 .Lekey_internal: 849 stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key 850 851 larl $tbl,AES_Te+2048 852 853 llgf $s0,0($inp) 854 llgf $s1,4($inp) 855 llgf $s2,8($inp) 856 llgf $s3,12($inp) 857 st $s0,0($key) 858 st $s1,4($key) 859 st $s2,8($key) 860 st $s3,12($key) 861 lghi $t0,128 862 cr $bits,$t0 863 jne .Lnot128 864 865 llill $mask,0xff 866 lghi $t3,0 # i=0 867 lghi $rounds,10 868 st $rounds,240($key) 869 870 llgfr $t2,$s3 # temp=rk[3] 871 srlg $i1,$s3,8 872 srlg $i2,$s3,16 873 srlg $i3,$s3,24 874 nr $t2,$mask 875 nr $i1,$mask 876 nr $i2,$mask 877 878 .align 16 879 .L128_loop: 880 la $t2,0($t2,$tbl) 881 la $i1,0($i1,$tbl) 882 la $i2,0($i2,$tbl) 883 la $i3,0($i3,$tbl) 884 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8 885 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16 886 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24 887 icm $t2,1,0($i3) # Te4[rk[3]>>24] 888 x $t2,256($t3,$tbl) # rcon[i] 889 xr $s0,$t2 # rk[4]=rk[0]^... 890 xr $s1,$s0 # rk[5]=rk[1]^rk[4] 891 xr $s2,$s1 # rk[6]=rk[2]^rk[5] 892 xr $s3,$s2 # rk[7]=rk[3]^rk[6] 893 894 llgfr $t2,$s3 # temp=rk[3] 895 srlg $i1,$s3,8 896 srlg $i2,$s3,16 897 nr $t2,$mask 898 nr $i1,$mask 899 srlg $i3,$s3,24 900 nr $i2,$mask 901 902 st $s0,16($key) 903 st $s1,20($key) 904 st $s2,24($key) 905 st $s3,28($key) 906 la $key,16($key) # key+=4 907 la $t3,4($t3) # i++ 908 brct $rounds,.L128_loop 909 lghi $t0,10 910 lghi %r2,0 911 lm${g} %r4,%r13,4*$SIZE_T($sp) 912 br $ra 913 914 .align 16 915 .Lnot128: 916 llgf $t0,16($inp) 917 llgf $t1,20($inp) 918 st $t0,16($key) 919 st $t1,20($key) 920 lghi $t0,192 921 cr $bits,$t0 922 jne .Lnot192 923 924 llill $mask,0xff 925 lghi $t3,0 # i=0 926 lghi $rounds,12 927 st $rounds,240($key) 928 lghi $rounds,8 929 930 srlg $i1,$t1,8 931 srlg $i2,$t1,16 932 srlg $i3,$t1,24 933 nr $t1,$mask 934 nr $i1,$mask 935 nr $i2,$mask 936 937 .align 16 938 .L192_loop: 939 la $t1,0($t1,$tbl) 940 la $i1,0($i1,$tbl) 941 la $i2,0($i2,$tbl) 942 la $i3,0($i3,$tbl) 943 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8 944 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16 945 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24 946 icm $t1,1,0($i3) # Te4[rk[5]>>24] 947 x $t1,256($t3,$tbl) # rcon[i] 948 xr $s0,$t1 # rk[6]=rk[0]^... 949 xr $s1,$s0 # rk[7]=rk[1]^rk[6] 950 xr $s2,$s1 # rk[8]=rk[2]^rk[7] 951 xr $s3,$s2 # rk[9]=rk[3]^rk[8] 952 953 st $s0,24($key) 954 st $s1,28($key) 955 st $s2,32($key) 956 st $s3,36($key) 957 brct $rounds,.L192_continue 958 lghi $t0,12 959 lghi %r2,0 960 lm${g} %r4,%r13,4*$SIZE_T($sp) 961 br $ra 962 963 .align 16 964 .L192_continue: 965 lgr $t1,$s3 966 x $t1,16($key) # rk[10]=rk[4]^rk[9] 967 st $t1,40($key) 968 x $t1,20($key) # rk[11]=rk[5]^rk[10] 969 st $t1,44($key) 970 971 srlg $i1,$t1,8 972 srlg $i2,$t1,16 973 srlg $i3,$t1,24 974 nr $t1,$mask 975 nr $i1,$mask 976 nr $i2,$mask 977 978 la $key,24($key) # key+=6 979 la $t3,4($t3) # i++ 980 j .L192_loop 981 982 .align 16 983 .Lnot192: 984 llgf $t0,24($inp) 985 llgf $t1,28($inp) 986 st $t0,24($key) 987 st $t1,28($key) 988 llill $mask,0xff 989 lghi $t3,0 # i=0 990 lghi $rounds,14 991 st $rounds,240($key) 992 lghi $rounds,7 993 994 srlg $i1,$t1,8 995 srlg $i2,$t1,16 996 srlg $i3,$t1,24 997 nr $t1,$mask 998 nr $i1,$mask 999 nr $i2,$mask 1000 1001 .align 16 1002 .L256_loop: 1003 la $t1,0($t1,$tbl) 1004 la $i1,0($i1,$tbl) 1005 la $i2,0($i2,$tbl) 1006 la $i3,0($i3,$tbl) 1007 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8 1008 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16 1009 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24 1010 icm $t1,1,0($i3) # Te4[rk[7]>>24] 1011 x $t1,256($t3,$tbl) # rcon[i] 1012 xr $s0,$t1 # rk[8]=rk[0]^... 1013 xr $s1,$s0 # rk[9]=rk[1]^rk[8] 1014 xr $s2,$s1 # rk[10]=rk[2]^rk[9] 1015 xr $s3,$s2 # rk[11]=rk[3]^rk[10] 1016 st $s0,32($key) 1017 st $s1,36($key) 1018 st $s2,40($key) 1019 st $s3,44($key) 1020 brct $rounds,.L256_continue 1021 lghi $t0,14 1022 lghi %r2,0 1023 lm${g} %r4,%r13,4*$SIZE_T($sp) 1024 br $ra 1025 1026 .align 16 1027 .L256_continue: 1028 lgr $t1,$s3 # temp=rk[11] 1029 srlg $i1,$s3,8 1030 srlg $i2,$s3,16 1031 srlg $i3,$s3,24 1032 nr $t1,$mask 1033 nr $i1,$mask 1034 nr $i2,$mask 1035 la $t1,0($t1,$tbl) 1036 la $i1,0($i1,$tbl) 1037 la $i2,0($i2,$tbl) 1038 la $i3,0($i3,$tbl) 1039 llgc $t1,0($t1) # Te4[rk[11]>>0] 1040 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8 1041 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16 1042 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24 1043 x $t1,16($key) # rk[12]=rk[4]^... 1044 st $t1,48($key) 1045 x $t1,20($key) # rk[13]=rk[5]^rk[12] 1046 st $t1,52($key) 1047 x $t1,24($key) # rk[14]=rk[6]^rk[13] 1048 st $t1,56($key) 1049 x $t1,28($key) # rk[15]=rk[7]^rk[14] 1050 st $t1,60($key) 1051 1052 srlg $i1,$t1,8 1053 srlg $i2,$t1,16 1054 srlg $i3,$t1,24 1055 nr $t1,$mask 1056 nr $i1,$mask 1057 nr $i2,$mask 1058 1059 la $key,32($key) # key+=8 1060 la $t3,4($t3) # i++ 1061 j .L256_loop 1062 1063 .Lminus1: 1064 lghi %r2,-1 1065 br $ra 1066 .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key 1067 1068 # void AES_set_decrypt_key(const unsigned char *in, int bits, 1069 # AES_KEY *key) { 1070 .globl private_AES_set_decrypt_key 1071 .type private_AES_set_decrypt_key,\@function 1072 .align 16 1073 private_AES_set_decrypt_key: 1074 #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to 1075 st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key! 1076 bras $ra,_s390x_AES_set_encrypt_key 1077 #l${g} $key,4*$SIZE_T($sp) 1078 l${g} $ra,14*$SIZE_T($sp) 1079 ltgr %r2,%r2 1080 bnzr $ra 1081 ___ 1082 $code.=<<___ if (!$softonly); 1083 #l $t0,240($key) 1084 lhi $t1,16 1085 cr $t0,$t1 1086 jl .Lgo 1087 oill $t0,0x80 # set "decrypt" bit 1088 st $t0,240($key) 1089 br $ra 1090 ___ 1091 $code.=<<___; 1092 .align 16 1093 .Lgo: lgr $rounds,$t0 #llgf $rounds,240($key) 1094 la $i1,0($key) 1095 sllg $i2,$rounds,4 1096 la $i2,0($i2,$key) 1097 srl $rounds,1 1098 lghi $t1,-16 1099 1100 .align 16 1101 .Linv: lmg $s0,$s1,0($i1) 1102 lmg $s2,$s3,0($i2) 1103 stmg $s0,$s1,0($i2) 1104 stmg $s2,$s3,0($i1) 1105 la $i1,16($i1) 1106 la $i2,0($t1,$i2) 1107 brct $rounds,.Linv 1108 ___ 1109 $mask80=$i1; 1110 $mask1b=$i2; 1111 $maskfe=$i3; 1112 $code.=<<___; 1113 llgf $rounds,240($key) 1114 aghi $rounds,-1 1115 sll $rounds,2 # (rounds-1)*4 1116 llilh $mask80,0x8080 1117 llilh $mask1b,0x1b1b 1118 llilh $maskfe,0xfefe 1119 oill $mask80,0x8080 1120 oill $mask1b,0x1b1b 1121 oill $maskfe,0xfefe 1122 1123 .align 16 1124 .Lmix: l $s0,16($key) # tp1 1125 lr $s1,$s0 1126 ngr $s1,$mask80 1127 srlg $t1,$s1,7 1128 slr $s1,$t1 1129 nr $s1,$mask1b 1130 sllg $t1,$s0,1 1131 nr $t1,$maskfe 1132 xr $s1,$t1 # tp2 1133 1134 lr $s2,$s1 1135 ngr $s2,$mask80 1136 srlg $t1,$s2,7 1137 slr $s2,$t1 1138 nr $s2,$mask1b 1139 sllg $t1,$s1,1 1140 nr $t1,$maskfe 1141 xr $s2,$t1 # tp4 1142 1143 lr $s3,$s2 1144 ngr $s3,$mask80 1145 srlg $t1,$s3,7 1146 slr $s3,$t1 1147 nr $s3,$mask1b 1148 sllg $t1,$s2,1 1149 nr $t1,$maskfe 1150 xr $s3,$t1 # tp8 1151 1152 xr $s1,$s0 # tp2^tp1 1153 xr $s2,$s0 # tp4^tp1 1154 rll $s0,$s0,24 # = ROTATE(tp1,8) 1155 xr $s2,$s3 # ^=tp8 1156 xr $s0,$s1 # ^=tp2^tp1 1157 xr $s1,$s3 # tp2^tp1^tp8 1158 xr $s0,$s2 # ^=tp4^tp1^tp8 1159 rll $s1,$s1,8 1160 rll $s2,$s2,16 1161 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24) 1162 rll $s3,$s3,24 1163 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16) 1164 xr $s0,$s3 # ^= ROTATE(tp8,8) 1165 1166 st $s0,16($key) 1167 la $key,4($key) 1168 brct $rounds,.Lmix 1169 1170 lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key! 1171 lghi %r2,0 1172 br $ra 1173 .size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key 1174 ___ 1175 1176 ######################################################################## 1177 # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, 1178 # size_t length, const AES_KEY *key, 1179 # unsigned char *ivec, const int enc) 1180 { 1181 my $inp="%r2"; 1182 my $out="%r4"; # length and out are swapped 1183 my $len="%r3"; 1184 my $key="%r5"; 1185 my $ivp="%r6"; 1186 1187 $code.=<<___; 1188 .globl AES_cbc_encrypt 1189 .type AES_cbc_encrypt,\@function 1190 .align 16 1191 AES_cbc_encrypt: 1192 xgr %r3,%r4 # flip %r3 and %r4, out and len 1193 xgr %r4,%r3 1194 xgr %r3,%r4 1195 ___ 1196 $code.=<<___ if (!$softonly); 1197 lhi %r0,16 1198 cl %r0,240($key) 1199 jh .Lcbc_software 1200 1201 lg %r0,0($ivp) # copy ivec 1202 lg %r1,8($ivp) 1203 stmg %r0,%r1,16($sp) 1204 lmg %r0,%r1,0($key) # copy key, cover 256 bit 1205 stmg %r0,%r1,32($sp) 1206 lmg %r0,%r1,16($key) 1207 stmg %r0,%r1,48($sp) 1208 l %r0,240($key) # load kmc code 1209 lghi $key,15 # res=len%16, len-=res; 1210 ngr $key,$len 1211 sl${g}r $len,$key 1212 la %r1,16($sp) # parameter block - ivec || key 1213 jz .Lkmc_truncated 1214 .long 0xb92f0042 # kmc %r4,%r2 1215 brc 1,.-4 # pay attention to "partial completion" 1216 ltr $key,$key 1217 jnz .Lkmc_truncated 1218 .Lkmc_done: 1219 lmg %r0,%r1,16($sp) # copy ivec to caller 1220 stg %r0,0($ivp) 1221 stg %r1,8($ivp) 1222 br $ra 1223 .align 16 1224 .Lkmc_truncated: 1225 ahi $key,-1 # it's the way it's encoded in mvc 1226 tmll %r0,0x80 1227 jnz .Lkmc_truncated_dec 1228 lghi %r1,0 1229 stg %r1,16*$SIZE_T($sp) 1230 stg %r1,16*$SIZE_T+8($sp) 1231 bras %r1,1f 1232 mvc 16*$SIZE_T(1,$sp),0($inp) 1233 1: ex $key,0(%r1) 1234 la %r1,16($sp) # restore parameter block 1235 la $inp,16*$SIZE_T($sp) 1236 lghi $len,16 1237 .long 0xb92f0042 # kmc %r4,%r2 1238 j .Lkmc_done 1239 .align 16 1240 .Lkmc_truncated_dec: 1241 st${g} $out,4*$SIZE_T($sp) 1242 la $out,16*$SIZE_T($sp) 1243 lghi $len,16 1244 .long 0xb92f0042 # kmc %r4,%r2 1245 l${g} $out,4*$SIZE_T($sp) 1246 bras %r1,2f 1247 mvc 0(1,$out),16*$SIZE_T($sp) 1248 2: ex $key,0(%r1) 1249 j .Lkmc_done 1250 .align 16 1251 .Lcbc_software: 1252 ___ 1253 $code.=<<___; 1254 stm${g} $key,$ra,5*$SIZE_T($sp) 1255 lhi %r0,0 1256 cl %r0,`$stdframe+$SIZE_T-4`($sp) 1257 je .Lcbc_decrypt 1258 1259 larl $tbl,AES_Te 1260 1261 llgf $s0,0($ivp) 1262 llgf $s1,4($ivp) 1263 llgf $s2,8($ivp) 1264 llgf $s3,12($ivp) 1265 1266 lghi $t0,16 1267 sl${g}r $len,$t0 1268 brc 4,.Lcbc_enc_tail # if borrow 1269 .Lcbc_enc_loop: 1270 stm${g} $inp,$out,2*$SIZE_T($sp) 1271 x $s0,0($inp) 1272 x $s1,4($inp) 1273 x $s2,8($inp) 1274 x $s3,12($inp) 1275 lgr %r4,$key 1276 1277 bras $ra,_s390x_AES_encrypt 1278 1279 lm${g} $inp,$key,2*$SIZE_T($sp) 1280 st $s0,0($out) 1281 st $s1,4($out) 1282 st $s2,8($out) 1283 st $s3,12($out) 1284 1285 la $inp,16($inp) 1286 la $out,16($out) 1287 lghi $t0,16 1288 lt${g}r $len,$len 1289 jz .Lcbc_enc_done 1290 sl${g}r $len,$t0 1291 brc 4,.Lcbc_enc_tail # if borrow 1292 j .Lcbc_enc_loop 1293 .align 16 1294 .Lcbc_enc_done: 1295 l${g} $ivp,6*$SIZE_T($sp) 1296 st $s0,0($ivp) 1297 st $s1,4($ivp) 1298 st $s2,8($ivp) 1299 st $s3,12($ivp) 1300 1301 lm${g} %r7,$ra,7*$SIZE_T($sp) 1302 br $ra 1303 1304 .align 16 1305 .Lcbc_enc_tail: 1306 aghi $len,15 1307 lghi $t0,0 1308 stg $t0,16*$SIZE_T($sp) 1309 stg $t0,16*$SIZE_T+8($sp) 1310 bras $t1,3f 1311 mvc 16*$SIZE_T(1,$sp),0($inp) 1312 3: ex $len,0($t1) 1313 lghi $len,0 1314 la $inp,16*$SIZE_T($sp) 1315 j .Lcbc_enc_loop 1316 1317 .align 16 1318 .Lcbc_decrypt: 1319 larl $tbl,AES_Td 1320 1321 lg $t0,0($ivp) 1322 lg $t1,8($ivp) 1323 stmg $t0,$t1,16*$SIZE_T($sp) 1324 1325 .Lcbc_dec_loop: 1326 stm${g} $inp,$out,2*$SIZE_T($sp) 1327 llgf $s0,0($inp) 1328 llgf $s1,4($inp) 1329 llgf $s2,8($inp) 1330 llgf $s3,12($inp) 1331 lgr %r4,$key 1332 1333 bras $ra,_s390x_AES_decrypt 1334 1335 lm${g} $inp,$key,2*$SIZE_T($sp) 1336 sllg $s0,$s0,32 1337 sllg $s2,$s2,32 1338 lr $s0,$s1 1339 lr $s2,$s3 1340 1341 lg $t0,0($inp) 1342 lg $t1,8($inp) 1343 xg $s0,16*$SIZE_T($sp) 1344 xg $s2,16*$SIZE_T+8($sp) 1345 lghi $s1,16 1346 sl${g}r $len,$s1 1347 brc 4,.Lcbc_dec_tail # if borrow 1348 brc 2,.Lcbc_dec_done # if zero 1349 stg $s0,0($out) 1350 stg $s2,8($out) 1351 stmg $t0,$t1,16*$SIZE_T($sp) 1352 1353 la $inp,16($inp) 1354 la $out,16($out) 1355 j .Lcbc_dec_loop 1356 1357 .Lcbc_dec_done: 1358 stg $s0,0($out) 1359 stg $s2,8($out) 1360 .Lcbc_dec_exit: 1361 lm${g} %r6,$ra,6*$SIZE_T($sp) 1362 stmg $t0,$t1,0($ivp) 1363 1364 br $ra 1365 1366 .align 16 1367 .Lcbc_dec_tail: 1368 aghi $len,15 1369 stg $s0,16*$SIZE_T($sp) 1370 stg $s2,16*$SIZE_T+8($sp) 1371 bras $s1,4f 1372 mvc 0(1,$out),16*$SIZE_T($sp) 1373 4: ex $len,0($s1) 1374 j .Lcbc_dec_exit 1375 .size AES_cbc_encrypt,.-AES_cbc_encrypt 1376 ___ 1377 } 1378 ######################################################################## 1379 # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, 1380 # size_t blocks, const AES_KEY *key, 1381 # const unsigned char *ivec) 1382 { 1383 my $inp="%r2"; 1384 my $out="%r4"; # blocks and out are swapped 1385 my $len="%r3"; 1386 my $key="%r5"; my $iv0="%r5"; 1387 my $ivp="%r6"; 1388 my $fp ="%r7"; 1389 1390 $code.=<<___; 1391 .globl AES_ctr32_encrypt 1392 .type AES_ctr32_encrypt,\@function 1393 .align 16 1394 AES_ctr32_encrypt: 1395 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 1396 xgr %r4,%r3 1397 xgr %r3,%r4 1398 llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case 1399 ___ 1400 $code.=<<___ if (!$softonly); 1401 l %r0,240($key) 1402 lhi %r1,16 1403 clr %r0,%r1 1404 jl .Lctr32_software 1405 1406 stm${g} %r6,$s3,6*$SIZE_T($sp) 1407 1408 slgr $out,$inp 1409 la %r1,0($key) # %r1 is permanent copy of $key 1410 lg $iv0,0($ivp) # load ivec 1411 lg $ivp,8($ivp) 1412 1413 # prepare and allocate stack frame at the top of 4K page 1414 # with 1K reserved for eventual signal handling 1415 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer 1416 lghi $s1,-4096 1417 algr $s0,$sp 1418 lgr $fp,$sp 1419 ngr $s0,$s1 # align at page boundary 1420 slgr $fp,$s0 # total buffer size 1421 lgr $s2,$sp 1422 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility 1423 slgr $fp,$s1 # deduct reservation to get usable buffer size 1424 # buffer size is at lest 256 and at most 3072+256-16 1425 1426 la $sp,1024($s0) # alloca 1427 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16 1428 st${g} $s2,0($sp) # back-chain 1429 st${g} $fp,$SIZE_T($sp) 1430 1431 slgr $len,$fp 1432 brc 1,.Lctr32_hw_switch # not zero, no borrow 1433 algr $fp,$len # input is shorter than allocated buffer 1434 lghi $len,0 1435 st${g} $fp,$SIZE_T($sp) 1436 1437 .Lctr32_hw_switch: 1438 ___ 1439 $code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower 1440 larl $s0,OPENSSL_s390xcap_P 1441 lg $s0,8($s0) 1442 tmhh $s0,0x0004 # check for message_security-assist-4 1443 jz .Lctr32_km_loop 1444 1445 llgfr $s0,%r0 1446 lgr $s1,%r1 1447 lghi %r0,0 1448 la %r1,16($sp) 1449 .long 0xb92d2042 # kmctr %r4,%r2,%r2 1450 1451 llihh %r0,0x8000 # check if kmctr supports the function code 1452 srlg %r0,%r0,0($s0) 1453 ng %r0,16($sp) 1454 lgr %r0,$s0 1455 lgr %r1,$s1 1456 jz .Lctr32_km_loop 1457 1458 ####### kmctr code 1459 algr $out,$inp # restore $out 1460 lgr $s1,$len # $s1 undertakes $len 1461 j .Lctr32_kmctr_loop 1462 .align 16 1463 .Lctr32_kmctr_loop: 1464 la $s2,16($sp) 1465 lgr $s3,$fp 1466 .Lctr32_kmctr_prepare: 1467 stg $iv0,0($s2) 1468 stg $ivp,8($s2) 1469 la $s2,16($s2) 1470 ahi $ivp,1 # 32-bit increment, preserves upper half 1471 brct $s3,.Lctr32_kmctr_prepare 1472 1473 #la $inp,0($inp) # inp 1474 sllg $len,$fp,4 # len 1475 #la $out,0($out) # out 1476 la $s2,16($sp) # iv 1477 .long 0xb92da042 # kmctr $out,$s2,$inp 1478 brc 1,.-4 # pay attention to "partial completion" 1479 1480 slgr $s1,$fp 1481 brc 1,.Lctr32_kmctr_loop # not zero, no borrow 1482 algr $fp,$s1 1483 lghi $s1,0 1484 brc 4+1,.Lctr32_kmctr_loop # not zero 1485 1486 l${g} $sp,0($sp) 1487 lm${g} %r6,$s3,6*$SIZE_T($sp) 1488 br $ra 1489 .align 16 1490 ___ 1491 $code.=<<___; 1492 .Lctr32_km_loop: 1493 la $s2,16($sp) 1494 lgr $s3,$fp 1495 .Lctr32_km_prepare: 1496 stg $iv0,0($s2) 1497 stg $ivp,8($s2) 1498 la $s2,16($s2) 1499 ahi $ivp,1 # 32-bit increment, preserves upper half 1500 brct $s3,.Lctr32_km_prepare 1501 1502 la $s0,16($sp) # inp 1503 sllg $s1,$fp,4 # len 1504 la $s2,16($sp) # out 1505 .long 0xb92e00a8 # km %r10,%r8 1506 brc 1,.-4 # pay attention to "partial completion" 1507 1508 la $s2,16($sp) 1509 lgr $s3,$fp 1510 slgr $s2,$inp 1511 .Lctr32_km_xor: 1512 lg $s0,0($inp) 1513 lg $s1,8($inp) 1514 xg $s0,0($s2,$inp) 1515 xg $s1,8($s2,$inp) 1516 stg $s0,0($out,$inp) 1517 stg $s1,8($out,$inp) 1518 la $inp,16($inp) 1519 brct $s3,.Lctr32_km_xor 1520 1521 slgr $len,$fp 1522 brc 1,.Lctr32_km_loop # not zero, no borrow 1523 algr $fp,$len 1524 lghi $len,0 1525 brc 4+1,.Lctr32_km_loop # not zero 1526 1527 l${g} $s0,0($sp) 1528 l${g} $s1,$SIZE_T($sp) 1529 la $s2,16($sp) 1530 .Lctr32_km_zap: 1531 stg $s0,0($s2) 1532 stg $s0,8($s2) 1533 la $s2,16($s2) 1534 brct $s1,.Lctr32_km_zap 1535 1536 la $sp,0($s0) 1537 lm${g} %r6,$s3,6*$SIZE_T($sp) 1538 br $ra 1539 .align 16 1540 .Lctr32_software: 1541 ___ 1542 $code.=<<___; 1543 stm${g} $key,$ra,5*$SIZE_T($sp) 1544 sl${g}r $inp,$out 1545 larl $tbl,AES_Te 1546 llgf $t1,12($ivp) 1547 1548 .Lctr32_loop: 1549 stm${g} $inp,$out,2*$SIZE_T($sp) 1550 llgf $s0,0($ivp) 1551 llgf $s1,4($ivp) 1552 llgf $s2,8($ivp) 1553 lgr $s3,$t1 1554 st $t1,16*$SIZE_T($sp) 1555 lgr %r4,$key 1556 1557 bras $ra,_s390x_AES_encrypt 1558 1559 lm${g} $inp,$ivp,2*$SIZE_T($sp) 1560 llgf $t1,16*$SIZE_T($sp) 1561 x $s0,0($inp,$out) 1562 x $s1,4($inp,$out) 1563 x $s2,8($inp,$out) 1564 x $s3,12($inp,$out) 1565 stm $s0,$s3,0($out) 1566 1567 la $out,16($out) 1568 ahi $t1,1 # 32-bit increment 1569 brct $len,.Lctr32_loop 1570 1571 lm${g} %r6,$ra,6*$SIZE_T($sp) 1572 br $ra 1573 .size AES_ctr32_encrypt,.-AES_ctr32_encrypt 1574 ___ 1575 } 1576 1577 ######################################################################## 1578 # void AES_xts_encrypt(const char *inp,char *out,size_t len, 1579 # const AES_KEY *key1, const AES_KEY *key2, 1580 # const unsigned char iv[16]); 1581 # 1582 { 1583 my $inp="%r2"; 1584 my $out="%r4"; # len and out are swapped 1585 my $len="%r3"; 1586 my $key1="%r5"; # $i1 1587 my $key2="%r6"; # $i2 1588 my $fp="%r7"; # $i3 1589 my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame... 1590 1591 $code.=<<___; 1592 .type _s390x_xts_km,\@function 1593 .align 16 1594 _s390x_xts_km: 1595 ___ 1596 $code.=<<___ if(1); 1597 llgfr $s0,%r0 # put aside the function code 1598 lghi $s1,0x7f 1599 nr $s1,%r0 1600 lghi %r0,0 # query capability vector 1601 la %r1,$tweak-16($sp) 1602 .long 0xb92e0042 # km %r4,%r2 1603 llihh %r1,0x8000 1604 srlg %r1,%r1,32($s1) # check for 32+function code 1605 ng %r1,$tweak-16($sp) 1606 lgr %r0,$s0 # restore the function code 1607 la %r1,0($key1) # restore $key1 1608 jz .Lxts_km_vanilla 1609 1610 lmg $i2,$i3,$tweak($sp) # put aside the tweak value 1611 algr $out,$inp 1612 1613 oill %r0,32 # switch to xts function code 1614 aghi $s1,-18 # 1615 sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16 1616 la %r1,$tweak-16($sp) 1617 slgr %r1,$s1 # parameter block position 1618 lmg $s0,$s3,0($key1) # load 256 bits of key material, 1619 stmg $s0,$s3,0(%r1) # and copy it to parameter block. 1620 # yes, it contains junk and overlaps 1621 # with the tweak in 128-bit case. 1622 # it's done to avoid conditional 1623 # branch. 1624 stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value 1625 1626 .long 0xb92e0042 # km %r4,%r2 1627 brc 1,.-4 # pay attention to "partial completion" 1628 1629 lrvg $s0,$tweak+0($sp) # load the last tweak 1630 lrvg $s1,$tweak+8($sp) 1631 stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key 1632 1633 nill %r0,0xffdf # switch back to original function code 1634 la %r1,0($key1) # restore pointer to $key1 1635 slgr $out,$inp 1636 1637 llgc $len,2*$SIZE_T-1($sp) 1638 nill $len,0x0f # $len%=16 1639 br $ra 1640 1641 .align 16 1642 .Lxts_km_vanilla: 1643 ___ 1644 $code.=<<___; 1645 # prepare and allocate stack frame at the top of 4K page 1646 # with 1K reserved for eventual signal handling 1647 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer 1648 lghi $s1,-4096 1649 algr $s0,$sp 1650 lgr $fp,$sp 1651 ngr $s0,$s1 # align at page boundary 1652 slgr $fp,$s0 # total buffer size 1653 lgr $s2,$sp 1654 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility 1655 slgr $fp,$s1 # deduct reservation to get usable buffer size 1656 # buffer size is at lest 256 and at most 3072+256-16 1657 1658 la $sp,1024($s0) # alloca 1659 nill $fp,0xfff0 # round to 16*n 1660 st${g} $s2,0($sp) # back-chain 1661 nill $len,0xfff0 # redundant 1662 st${g} $fp,$SIZE_T($sp) 1663 1664 slgr $len,$fp 1665 brc 1,.Lxts_km_go # not zero, no borrow 1666 algr $fp,$len # input is shorter than allocated buffer 1667 lghi $len,0 1668 st${g} $fp,$SIZE_T($sp) 1669 1670 .Lxts_km_go: 1671 lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian 1672 lrvg $s1,$tweak+8($s2) 1673 1674 la $s2,16($sp) # vector of ascending tweak values 1675 slgr $s2,$inp 1676 srlg $s3,$fp,4 1677 j .Lxts_km_start 1678 1679 .Lxts_km_loop: 1680 la $s2,16($sp) 1681 slgr $s2,$inp 1682 srlg $s3,$fp,4 1683 .Lxts_km_prepare: 1684 lghi $i1,0x87 1685 srag $i2,$s1,63 # broadcast upper bit 1686 ngr $i1,$i2 # rem 1687 algr $s0,$s0 1688 alcgr $s1,$s1 1689 xgr $s0,$i1 1690 .Lxts_km_start: 1691 lrvgr $i1,$s0 # flip byte order 1692 lrvgr $i2,$s1 1693 stg $i1,0($s2,$inp) 1694 stg $i2,8($s2,$inp) 1695 xg $i1,0($inp) 1696 xg $i2,8($inp) 1697 stg $i1,0($out,$inp) 1698 stg $i2,8($out,$inp) 1699 la $inp,16($inp) 1700 brct $s3,.Lxts_km_prepare 1701 1702 slgr $inp,$fp # rewind $inp 1703 la $s2,0($out,$inp) 1704 lgr $s3,$fp 1705 .long 0xb92e00aa # km $s2,$s2 1706 brc 1,.-4 # pay attention to "partial completion" 1707 1708 la $s2,16($sp) 1709 slgr $s2,$inp 1710 srlg $s3,$fp,4 1711 .Lxts_km_xor: 1712 lg $i1,0($out,$inp) 1713 lg $i2,8($out,$inp) 1714 xg $i1,0($s2,$inp) 1715 xg $i2,8($s2,$inp) 1716 stg $i1,0($out,$inp) 1717 stg $i2,8($out,$inp) 1718 la $inp,16($inp) 1719 brct $s3,.Lxts_km_xor 1720 1721 slgr $len,$fp 1722 brc 1,.Lxts_km_loop # not zero, no borrow 1723 algr $fp,$len 1724 lghi $len,0 1725 brc 4+1,.Lxts_km_loop # not zero 1726 1727 l${g} $i1,0($sp) # back-chain 1728 llgf $fp,`2*$SIZE_T-4`($sp) # bytes used 1729 la $i2,16($sp) 1730 srlg $fp,$fp,4 1731 .Lxts_km_zap: 1732 stg $i1,0($i2) 1733 stg $i1,8($i2) 1734 la $i2,16($i2) 1735 brct $fp,.Lxts_km_zap 1736 1737 la $sp,0($i1) 1738 llgc $len,2*$SIZE_T-1($i1) 1739 nill $len,0x0f # $len%=16 1740 bzr $ra 1741 1742 # generate one more tweak... 1743 lghi $i1,0x87 1744 srag $i2,$s1,63 # broadcast upper bit 1745 ngr $i1,$i2 # rem 1746 algr $s0,$s0 1747 alcgr $s1,$s1 1748 xgr $s0,$i1 1749 1750 ltr $len,$len # clear zero flag 1751 br $ra 1752 .size _s390x_xts_km,.-_s390x_xts_km 1753 1754 .globl AES_xts_encrypt 1755 .type AES_xts_encrypt,\@function 1756 .align 16 1757 AES_xts_encrypt: 1758 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 1759 xgr %r4,%r3 1760 xgr %r3,%r4 1761 ___ 1762 $code.=<<___ if ($SIZE_T==4); 1763 llgfr $len,$len 1764 ___ 1765 $code.=<<___; 1766 st${g} $len,1*$SIZE_T($sp) # save copy of $len 1767 srag $len,$len,4 # formally wrong, because it expands 1768 # sign byte, but who can afford asking 1769 # to process more than 2^63-1 bytes? 1770 # I use it, because it sets condition 1771 # code... 1772 bcr 8,$ra # abort if zero (i.e. less than 16) 1773 ___ 1774 $code.=<<___ if (!$softonly); 1775 llgf %r0,240($key2) 1776 lhi %r1,16 1777 clr %r0,%r1 1778 jl .Lxts_enc_software 1779 1780 st${g} $ra,5*$SIZE_T($sp) 1781 stm${g} %r6,$s3,6*$SIZE_T($sp) 1782 1783 sllg $len,$len,4 # $len&=~15 1784 slgr $out,$inp 1785 1786 # generate the tweak value 1787 l${g} $s3,$stdframe($sp) # pointer to iv 1788 la $s2,$tweak($sp) 1789 lmg $s0,$s1,0($s3) 1790 lghi $s3,16 1791 stmg $s0,$s1,0($s2) 1792 la %r1,0($key2) # $key2 is not needed anymore 1793 .long 0xb92e00aa # km $s2,$s2, generate the tweak 1794 brc 1,.-4 # can this happen? 1795 1796 l %r0,240($key1) 1797 la %r1,0($key1) # $key1 is not needed anymore 1798 bras $ra,_s390x_xts_km 1799 jz .Lxts_enc_km_done 1800 1801 aghi $inp,-16 # take one step back 1802 la $i3,0($out,$inp) # put aside real $out 1803 .Lxts_enc_km_steal: 1804 llgc $i1,16($inp) 1805 llgc $i2,0($out,$inp) 1806 stc $i1,0($out,$inp) 1807 stc $i2,16($out,$inp) 1808 la $inp,1($inp) 1809 brct $len,.Lxts_enc_km_steal 1810 1811 la $s2,0($i3) 1812 lghi $s3,16 1813 lrvgr $i1,$s0 # flip byte order 1814 lrvgr $i2,$s1 1815 xg $i1,0($s2) 1816 xg $i2,8($s2) 1817 stg $i1,0($s2) 1818 stg $i2,8($s2) 1819 .long 0xb92e00aa # km $s2,$s2 1820 brc 1,.-4 # can this happen? 1821 lrvgr $i1,$s0 # flip byte order 1822 lrvgr $i2,$s1 1823 xg $i1,0($i3) 1824 xg $i2,8($i3) 1825 stg $i1,0($i3) 1826 stg $i2,8($i3) 1827 1828 .Lxts_enc_km_done: 1829 stg $sp,$tweak+0($sp) # wipe tweak 1830 stg $sp,$tweak+8($sp) 1831 l${g} $ra,5*$SIZE_T($sp) 1832 lm${g} %r6,$s3,6*$SIZE_T($sp) 1833 br $ra 1834 .align 16 1835 .Lxts_enc_software: 1836 ___ 1837 $code.=<<___; 1838 stm${g} %r6,$ra,6*$SIZE_T($sp) 1839 1840 slgr $out,$inp 1841 1842 l${g} $s3,$stdframe($sp) # ivp 1843 llgf $s0,0($s3) # load iv 1844 llgf $s1,4($s3) 1845 llgf $s2,8($s3) 1846 llgf $s3,12($s3) 1847 stm${g} %r2,%r5,2*$SIZE_T($sp) 1848 la $key,0($key2) 1849 larl $tbl,AES_Te 1850 bras $ra,_s390x_AES_encrypt # generate the tweak 1851 lm${g} %r2,%r5,2*$SIZE_T($sp) 1852 stm $s0,$s3,$tweak($sp) # save the tweak 1853 j .Lxts_enc_enter 1854 1855 .align 16 1856 .Lxts_enc_loop: 1857 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 1858 lrvg $s3,$tweak+8($sp) 1859 lghi %r1,0x87 1860 srag %r0,$s3,63 # broadcast upper bit 1861 ngr %r1,%r0 # rem 1862 algr $s1,$s1 1863 alcgr $s3,$s3 1864 xgr $s1,%r1 1865 lrvgr $s1,$s1 # flip byte order 1866 lrvgr $s3,$s3 1867 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 1868 stg $s1,$tweak+0($sp) # save the tweak 1869 llgfr $s1,$s1 1870 srlg $s2,$s3,32 1871 stg $s3,$tweak+8($sp) 1872 llgfr $s3,$s3 1873 la $inp,16($inp) # $inp+=16 1874 .Lxts_enc_enter: 1875 x $s0,0($inp) # ^=*($inp) 1876 x $s1,4($inp) 1877 x $s2,8($inp) 1878 x $s3,12($inp) 1879 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing 1880 la $key,0($key1) 1881 bras $ra,_s390x_AES_encrypt 1882 lm${g} %r2,%r5,2*$SIZE_T($sp) 1883 x $s0,$tweak+0($sp) # ^=tweak 1884 x $s1,$tweak+4($sp) 1885 x $s2,$tweak+8($sp) 1886 x $s3,$tweak+12($sp) 1887 st $s0,0($out,$inp) 1888 st $s1,4($out,$inp) 1889 st $s2,8($out,$inp) 1890 st $s3,12($out,$inp) 1891 brct${g} $len,.Lxts_enc_loop 1892 1893 llgc $len,`2*$SIZE_T-1`($sp) 1894 nill $len,0x0f # $len%16 1895 jz .Lxts_enc_done 1896 1897 la $i3,0($inp,$out) # put aside real $out 1898 .Lxts_enc_steal: 1899 llgc %r0,16($inp) 1900 llgc %r1,0($out,$inp) 1901 stc %r0,0($out,$inp) 1902 stc %r1,16($out,$inp) 1903 la $inp,1($inp) 1904 brct $len,.Lxts_enc_steal 1905 la $out,0($i3) # restore real $out 1906 1907 # generate last tweak... 1908 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 1909 lrvg $s3,$tweak+8($sp) 1910 lghi %r1,0x87 1911 srag %r0,$s3,63 # broadcast upper bit 1912 ngr %r1,%r0 # rem 1913 algr $s1,$s1 1914 alcgr $s3,$s3 1915 xgr $s1,%r1 1916 lrvgr $s1,$s1 # flip byte order 1917 lrvgr $s3,$s3 1918 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 1919 stg $s1,$tweak+0($sp) # save the tweak 1920 llgfr $s1,$s1 1921 srlg $s2,$s3,32 1922 stg $s3,$tweak+8($sp) 1923 llgfr $s3,$s3 1924 1925 x $s0,0($out) # ^=*(inp)|stolen cipther-text 1926 x $s1,4($out) 1927 x $s2,8($out) 1928 x $s3,12($out) 1929 st${g} $out,4*$SIZE_T($sp) 1930 la $key,0($key1) 1931 bras $ra,_s390x_AES_encrypt 1932 l${g} $out,4*$SIZE_T($sp) 1933 x $s0,`$tweak+0`($sp) # ^=tweak 1934 x $s1,`$tweak+4`($sp) 1935 x $s2,`$tweak+8`($sp) 1936 x $s3,`$tweak+12`($sp) 1937 st $s0,0($out) 1938 st $s1,4($out) 1939 st $s2,8($out) 1940 st $s3,12($out) 1941 1942 .Lxts_enc_done: 1943 stg $sp,$tweak+0($sp) # wipe tweak 1944 stg $sp,$twesk+8($sp) 1945 lm${g} %r6,$ra,6*$SIZE_T($sp) 1946 br $ra 1947 .size AES_xts_encrypt,.-AES_xts_encrypt 1948 ___ 1949 # void AES_xts_decrypt(const char *inp,char *out,size_t len, 1950 # const AES_KEY *key1, const AES_KEY *key2, 1951 # const unsigned char iv[16]); 1952 # 1953 $code.=<<___; 1954 .globl AES_xts_decrypt 1955 .type AES_xts_decrypt,\@function 1956 .align 16 1957 AES_xts_decrypt: 1958 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 1959 xgr %r4,%r3 1960 xgr %r3,%r4 1961 ___ 1962 $code.=<<___ if ($SIZE_T==4); 1963 llgfr $len,$len 1964 ___ 1965 $code.=<<___; 1966 st${g} $len,1*$SIZE_T($sp) # save copy of $len 1967 aghi $len,-16 1968 bcr 4,$ra # abort if less than zero. formally 1969 # wrong, because $len is unsigned, 1970 # but who can afford asking to 1971 # process more than 2^63-1 bytes? 1972 tmll $len,0x0f 1973 jnz .Lxts_dec_proceed 1974 aghi $len,16 1975 .Lxts_dec_proceed: 1976 ___ 1977 $code.=<<___ if (!$softonly); 1978 llgf %r0,240($key2) 1979 lhi %r1,16 1980 clr %r0,%r1 1981 jl .Lxts_dec_software 1982 1983 st${g} $ra,5*$SIZE_T($sp) 1984 stm${g} %r6,$s3,6*$SIZE_T($sp) 1985 1986 nill $len,0xfff0 # $len&=~15 1987 slgr $out,$inp 1988 1989 # generate the tweak value 1990 l${g} $s3,$stdframe($sp) # pointer to iv 1991 la $s2,$tweak($sp) 1992 lmg $s0,$s1,0($s3) 1993 lghi $s3,16 1994 stmg $s0,$s1,0($s2) 1995 la %r1,0($key2) # $key2 is not needed past this point 1996 .long 0xb92e00aa # km $s2,$s2, generate the tweak 1997 brc 1,.-4 # can this happen? 1998 1999 l %r0,240($key1) 2000 la %r1,0($key1) # $key1 is not needed anymore 2001 2002 ltgr $len,$len 2003 jz .Lxts_dec_km_short 2004 bras $ra,_s390x_xts_km 2005 jz .Lxts_dec_km_done 2006 2007 lrvgr $s2,$s0 # make copy in reverse byte order 2008 lrvgr $s3,$s1 2009 j .Lxts_dec_km_2ndtweak 2010 2011 .Lxts_dec_km_short: 2012 llgc $len,`2*$SIZE_T-1`($sp) 2013 nill $len,0x0f # $len%=16 2014 lrvg $s0,$tweak+0($sp) # load the tweak 2015 lrvg $s1,$tweak+8($sp) 2016 lrvgr $s2,$s0 # make copy in reverse byte order 2017 lrvgr $s3,$s1 2018 2019 .Lxts_dec_km_2ndtweak: 2020 lghi $i1,0x87 2021 srag $i2,$s1,63 # broadcast upper bit 2022 ngr $i1,$i2 # rem 2023 algr $s0,$s0 2024 alcgr $s1,$s1 2025 xgr $s0,$i1 2026 lrvgr $i1,$s0 # flip byte order 2027 lrvgr $i2,$s1 2028 2029 xg $i1,0($inp) 2030 xg $i2,8($inp) 2031 stg $i1,0($out,$inp) 2032 stg $i2,8($out,$inp) 2033 la $i2,0($out,$inp) 2034 lghi $i3,16 2035 .long 0xb92e0066 # km $i2,$i2 2036 brc 1,.-4 # can this happen? 2037 lrvgr $i1,$s0 2038 lrvgr $i2,$s1 2039 xg $i1,0($out,$inp) 2040 xg $i2,8($out,$inp) 2041 stg $i1,0($out,$inp) 2042 stg $i2,8($out,$inp) 2043 2044 la $i3,0($out,$inp) # put aside real $out 2045 .Lxts_dec_km_steal: 2046 llgc $i1,16($inp) 2047 llgc $i2,0($out,$inp) 2048 stc $i1,0($out,$inp) 2049 stc $i2,16($out,$inp) 2050 la $inp,1($inp) 2051 brct $len,.Lxts_dec_km_steal 2052 2053 lgr $s0,$s2 2054 lgr $s1,$s3 2055 xg $s0,0($i3) 2056 xg $s1,8($i3) 2057 stg $s0,0($i3) 2058 stg $s1,8($i3) 2059 la $s0,0($i3) 2060 lghi $s1,16 2061 .long 0xb92e0088 # km $s0,$s0 2062 brc 1,.-4 # can this happen? 2063 xg $s2,0($i3) 2064 xg $s3,8($i3) 2065 stg $s2,0($i3) 2066 stg $s3,8($i3) 2067 .Lxts_dec_km_done: 2068 stg $sp,$tweak+0($sp) # wipe tweak 2069 stg $sp,$tweak+8($sp) 2070 l${g} $ra,5*$SIZE_T($sp) 2071 lm${g} %r6,$s3,6*$SIZE_T($sp) 2072 br $ra 2073 .align 16 2074 .Lxts_dec_software: 2075 ___ 2076 $code.=<<___; 2077 stm${g} %r6,$ra,6*$SIZE_T($sp) 2078 2079 srlg $len,$len,4 2080 slgr $out,$inp 2081 2082 l${g} $s3,$stdframe($sp) # ivp 2083 llgf $s0,0($s3) # load iv 2084 llgf $s1,4($s3) 2085 llgf $s2,8($s3) 2086 llgf $s3,12($s3) 2087 stm${g} %r2,%r5,2*$SIZE_T($sp) 2088 la $key,0($key2) 2089 larl $tbl,AES_Te 2090 bras $ra,_s390x_AES_encrypt # generate the tweak 2091 lm${g} %r2,%r5,2*$SIZE_T($sp) 2092 larl $tbl,AES_Td 2093 lt${g}r $len,$len 2094 stm $s0,$s3,$tweak($sp) # save the tweak 2095 jz .Lxts_dec_short 2096 j .Lxts_dec_enter 2097 2098 .align 16 2099 .Lxts_dec_loop: 2100 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2101 lrvg $s3,$tweak+8($sp) 2102 lghi %r1,0x87 2103 srag %r0,$s3,63 # broadcast upper bit 2104 ngr %r1,%r0 # rem 2105 algr $s1,$s1 2106 alcgr $s3,$s3 2107 xgr $s1,%r1 2108 lrvgr $s1,$s1 # flip byte order 2109 lrvgr $s3,$s3 2110 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 2111 stg $s1,$tweak+0($sp) # save the tweak 2112 llgfr $s1,$s1 2113 srlg $s2,$s3,32 2114 stg $s3,$tweak+8($sp) 2115 llgfr $s3,$s3 2116 .Lxts_dec_enter: 2117 x $s0,0($inp) # tweak^=*(inp) 2118 x $s1,4($inp) 2119 x $s2,8($inp) 2120 x $s3,12($inp) 2121 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing 2122 la $key,0($key1) 2123 bras $ra,_s390x_AES_decrypt 2124 lm${g} %r2,%r5,2*$SIZE_T($sp) 2125 x $s0,$tweak+0($sp) # ^=tweak 2126 x $s1,$tweak+4($sp) 2127 x $s2,$tweak+8($sp) 2128 x $s3,$tweak+12($sp) 2129 st $s0,0($out,$inp) 2130 st $s1,4($out,$inp) 2131 st $s2,8($out,$inp) 2132 st $s3,12($out,$inp) 2133 la $inp,16($inp) 2134 brct${g} $len,.Lxts_dec_loop 2135 2136 llgc $len,`2*$SIZE_T-1`($sp) 2137 nill $len,0x0f # $len%16 2138 jz .Lxts_dec_done 2139 2140 # generate pair of tweaks... 2141 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2142 lrvg $s3,$tweak+8($sp) 2143 lghi %r1,0x87 2144 srag %r0,$s3,63 # broadcast upper bit 2145 ngr %r1,%r0 # rem 2146 algr $s1,$s1 2147 alcgr $s3,$s3 2148 xgr $s1,%r1 2149 lrvgr $i2,$s1 # flip byte order 2150 lrvgr $i3,$s3 2151 stmg $i2,$i3,$tweak($sp) # save the 1st tweak 2152 j .Lxts_dec_2ndtweak 2153 2154 .align 16 2155 .Lxts_dec_short: 2156 llgc $len,`2*$SIZE_T-1`($sp) 2157 nill $len,0x0f # $len%16 2158 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2159 lrvg $s3,$tweak+8($sp) 2160 .Lxts_dec_2ndtweak: 2161 lghi %r1,0x87 2162 srag %r0,$s3,63 # broadcast upper bit 2163 ngr %r1,%r0 # rem 2164 algr $s1,$s1 2165 alcgr $s3,$s3 2166 xgr $s1,%r1 2167 lrvgr $s1,$s1 # flip byte order 2168 lrvgr $s3,$s3 2169 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 2170 stg $s1,$tweak-16+0($sp) # save the 2nd tweak 2171 llgfr $s1,$s1 2172 srlg $s2,$s3,32 2173 stg $s3,$tweak-16+8($sp) 2174 llgfr $s3,$s3 2175 2176 x $s0,0($inp) # tweak_the_2nd^=*(inp) 2177 x $s1,4($inp) 2178 x $s2,8($inp) 2179 x $s3,12($inp) 2180 stm${g} %r2,%r3,2*$SIZE_T($sp) 2181 la $key,0($key1) 2182 bras $ra,_s390x_AES_decrypt 2183 lm${g} %r2,%r5,2*$SIZE_T($sp) 2184 x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd 2185 x $s1,$tweak-16+4($sp) 2186 x $s2,$tweak-16+8($sp) 2187 x $s3,$tweak-16+12($sp) 2188 st $s0,0($out,$inp) 2189 st $s1,4($out,$inp) 2190 st $s2,8($out,$inp) 2191 st $s3,12($out,$inp) 2192 2193 la $i3,0($out,$inp) # put aside real $out 2194 .Lxts_dec_steal: 2195 llgc %r0,16($inp) 2196 llgc %r1,0($out,$inp) 2197 stc %r0,0($out,$inp) 2198 stc %r1,16($out,$inp) 2199 la $inp,1($inp) 2200 brct $len,.Lxts_dec_steal 2201 la $out,0($i3) # restore real $out 2202 2203 lm $s0,$s3,$tweak($sp) # load the 1st tweak 2204 x $s0,0($out) # tweak^=*(inp)|stolen cipher-text 2205 x $s1,4($out) 2206 x $s2,8($out) 2207 x $s3,12($out) 2208 st${g} $out,4*$SIZE_T($sp) 2209 la $key,0($key1) 2210 bras $ra,_s390x_AES_decrypt 2211 l${g} $out,4*$SIZE_T($sp) 2212 x $s0,$tweak+0($sp) # ^=tweak 2213 x $s1,$tweak+4($sp) 2214 x $s2,$tweak+8($sp) 2215 x $s3,$tweak+12($sp) 2216 st $s0,0($out) 2217 st $s1,4($out) 2218 st $s2,8($out) 2219 st $s3,12($out) 2220 stg $sp,$tweak-16+0($sp) # wipe 2nd tweak 2221 stg $sp,$tweak-16+8($sp) 2222 .Lxts_dec_done: 2223 stg $sp,$tweak+0($sp) # wipe tweak 2224 stg $sp,$twesk+8($sp) 2225 lm${g} %r6,$ra,6*$SIZE_T($sp) 2226 br $ra 2227 .size AES_xts_decrypt,.-AES_xts_decrypt 2228 ___ 2229 } 2230 $code.=<<___; 2231 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" 2232 .comm OPENSSL_s390xcap_P,16,8 2233 ___ 2234 2235 $code =~ s/\`([^\`]*)\`/eval $1/gem; 2236 print $code; 2237 close STDOUT; # force flush 2238