1 #!/usr/bin/env perl 2 3 # ==================================================================== 4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL 5 # project. The module is, however, dual licensed under OpenSSL and 6 # CRYPTOGAMS licenses depending on where you obtain it. For further 7 # details see http://www.openssl.org/~appro/cryptogams/. 8 # ==================================================================== 9 10 # AES for s390x. 11 12 # April 2007. 13 # 14 # Software performance improvement over gcc-generated code is ~70% and 15 # in absolute terms is ~73 cycles per byte processed with 128-bit key. 16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are 17 # *strictly* in-order execution and issued instruction [in this case 18 # load value from memory is critical] has to complete before execution 19 # flow proceeds. S-boxes are compressed to 2KB[+256B]. 20 # 21 # As for hardware acceleration support. It's basically a "teaser," as 22 # it can and should be improved in several ways. Most notably support 23 # for CBC is not utilized, nor multiple blocks are ever processed. 24 # Then software key schedule can be postponed till hardware support 25 # detection... Performance improvement over assembler is reportedly 26 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper 27 # support is implemented. 28 29 # May 2007. 30 # 31 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided 32 # for 128-bit keys, if hardware support is detected. 33 34 # Januray 2009. 35 # 36 # Add support for hardware AES192/256 and reschedule instructions to 37 # minimize/avoid Address Generation Interlock hazard and to favour 38 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and 39 # almost 50% on z9. The gain is smaller on z10, because being dual- 40 # issue z10 makes it improssible to eliminate the interlock condition: 41 # critial path is not long enough. Yet it spends ~24 cycles per byte 42 # processed with 128-bit key. 43 # 44 # Unlike previous version hardware support detection takes place only 45 # at the moment of key schedule setup, which is denoted in key->rounds. 46 # This is done, because deferred key setup can't be made MT-safe, not 47 # for keys longer than 128 bits. 48 # 49 # Add AES_cbc_encrypt, which gives incredible performance improvement, 50 # it was measured to be ~6.6x. It's less than previously mentioned 8x, 51 # because software implementation was optimized. 52 53 # May 2010. 54 # 55 # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x 56 # performance improvement over "generic" counter mode routine relying 57 # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers 58 # to the fact that exact throughput value depends on current stack 59 # frame alignment within 4KB page. In worst case you get ~75% of the 60 # maximum, but *on average* it would be as much as ~98%. Meaning that 61 # worst case is unlike, it's like hitting ravine on plateau. 62 63 # November 2010. 64 # 65 # Adapt for -m31 build. If kernel supports what's called "highgprs" 66 # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit 67 # instructions and achieve "64-bit" performance even in 31-bit legacy 68 # application context. The feature is not specific to any particular 69 # processor, as long as it's "z-CPU". Latter implies that the code 70 # remains z/Architecture specific. On z990 it was measured to perform 71 # 2x better than code generated by gcc 4.3. 72 73 # December 2010. 74 # 75 # Add support for z196 "cipher message with counter" instruction. 76 # Note however that it's disengaged, because it was measured to 77 # perform ~12% worse than vanilla km-based code... 78 79 # February 2011. 80 # 81 # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes 82 # instructions, which deliver ~70% improvement at 8KB block size over 83 # vanilla km-based code, 37% - at most like 512-bytes block size. 84 85 $flavour = shift; 86 87 if ($flavour =~ /3[12]/) { 88 $SIZE_T=4; 89 $g=""; 90 } else { 91 $SIZE_T=8; 92 $g="g"; 93 } 94 95 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 96 open STDOUT,">$output"; 97 98 $softonly=0; # allow hardware support 99 100 $t0="%r0"; $mask="%r0"; 101 $t1="%r1"; 102 $t2="%r2"; $inp="%r2"; 103 $t3="%r3"; $out="%r3"; $bits="%r3"; 104 $key="%r4"; 105 $i1="%r5"; 106 $i2="%r6"; 107 $i3="%r7"; 108 $s0="%r8"; 109 $s1="%r9"; 110 $s2="%r10"; 111 $s3="%r11"; 112 $tbl="%r12"; 113 $rounds="%r13"; 114 $ra="%r14"; 115 $sp="%r15"; 116 117 $stdframe=16*$SIZE_T+4*8; 118 119 sub _data_word() 120 { my $i; 121 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } 122 } 123 124 $code=<<___; 125 .text 126 127 .type AES_Te,\@object 128 .align 256 129 AES_Te: 130 ___ 131 &_data_word( 132 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 133 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 134 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 135 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 136 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 137 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 138 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 139 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 140 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 141 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 142 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 143 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 144 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 145 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 146 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 147 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 148 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 149 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 150 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 151 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 152 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 153 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 154 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 155 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 156 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 157 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 158 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 159 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 160 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 161 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 162 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 163 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 164 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 165 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 166 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 167 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 168 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 169 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 170 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 171 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 172 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 173 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 174 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 175 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 176 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 177 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 178 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 179 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 180 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 181 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 182 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 183 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 184 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 185 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 186 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 187 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 188 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 189 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 190 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 191 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 192 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 193 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 194 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 195 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 196 $code.=<<___; 197 # Te4[256] 198 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 199 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 200 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 201 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 202 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 203 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 204 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 205 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 206 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 207 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 208 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 209 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 210 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 211 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 212 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 213 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 214 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 215 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 216 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 217 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 218 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 219 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 220 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 221 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 222 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 223 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 224 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 225 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 226 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 227 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 228 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 229 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 230 # rcon[] 231 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000 232 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000 233 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 234 .align 256 235 .size AES_Te,.-AES_Te 236 237 # void AES_encrypt(const unsigned char *inp, unsigned char *out, 238 # const AES_KEY *key) { 239 .globl AES_encrypt 240 .type AES_encrypt,\@function 241 AES_encrypt: 242 ___ 243 $code.=<<___ if (!$softonly); 244 l %r0,240($key) 245 lhi %r1,16 246 clr %r0,%r1 247 jl .Lesoft 248 249 la %r1,0($key) 250 #la %r2,0($inp) 251 la %r4,0($out) 252 lghi %r3,16 # single block length 253 .long 0xb92e0042 # km %r4,%r2 254 brc 1,.-4 # can this happen? 255 br %r14 256 .align 64 257 .Lesoft: 258 ___ 259 $code.=<<___; 260 stm${g} %r3,$ra,3*$SIZE_T($sp) 261 262 llgf $s0,0($inp) 263 llgf $s1,4($inp) 264 llgf $s2,8($inp) 265 llgf $s3,12($inp) 266 267 larl $tbl,AES_Te 268 bras $ra,_s390x_AES_encrypt 269 270 l${g} $out,3*$SIZE_T($sp) 271 st $s0,0($out) 272 st $s1,4($out) 273 st $s2,8($out) 274 st $s3,12($out) 275 276 lm${g} %r6,$ra,6*$SIZE_T($sp) 277 br $ra 278 .size AES_encrypt,.-AES_encrypt 279 280 .type _s390x_AES_encrypt,\@function 281 .align 16 282 _s390x_AES_encrypt: 283 st${g} $ra,15*$SIZE_T($sp) 284 x $s0,0($key) 285 x $s1,4($key) 286 x $s2,8($key) 287 x $s3,12($key) 288 l $rounds,240($key) 289 llill $mask,`0xff<<3` 290 aghi $rounds,-1 291 j .Lenc_loop 292 .align 16 293 .Lenc_loop: 294 sllg $t1,$s0,`0+3` 295 srlg $t2,$s0,`8-3` 296 srlg $t3,$s0,`16-3` 297 srl $s0,`24-3` 298 nr $s0,$mask 299 ngr $t1,$mask 300 nr $t2,$mask 301 nr $t3,$mask 302 303 srlg $i1,$s1,`16-3` # i0 304 sllg $i2,$s1,`0+3` 305 srlg $i3,$s1,`8-3` 306 srl $s1,`24-3` 307 nr $i1,$mask 308 nr $s1,$mask 309 ngr $i2,$mask 310 nr $i3,$mask 311 312 l $s0,0($s0,$tbl) # Te0[s0>>24] 313 l $t1,1($t1,$tbl) # Te3[s0>>0] 314 l $t2,2($t2,$tbl) # Te2[s0>>8] 315 l $t3,3($t3,$tbl) # Te1[s0>>16] 316 317 x $s0,3($i1,$tbl) # Te1[s1>>16] 318 l $s1,0($s1,$tbl) # Te0[s1>>24] 319 x $t2,1($i2,$tbl) # Te3[s1>>0] 320 x $t3,2($i3,$tbl) # Te2[s1>>8] 321 322 srlg $i1,$s2,`8-3` # i0 323 srlg $i2,$s2,`16-3` # i1 324 nr $i1,$mask 325 nr $i2,$mask 326 sllg $i3,$s2,`0+3` 327 srl $s2,`24-3` 328 nr $s2,$mask 329 ngr $i3,$mask 330 331 xr $s1,$t1 332 srlg $ra,$s3,`8-3` # i1 333 sllg $t1,$s3,`0+3` # i0 334 nr $ra,$mask 335 la $key,16($key) 336 ngr $t1,$mask 337 338 x $s0,2($i1,$tbl) # Te2[s2>>8] 339 x $s1,3($i2,$tbl) # Te1[s2>>16] 340 l $s2,0($s2,$tbl) # Te0[s2>>24] 341 x $t3,1($i3,$tbl) # Te3[s2>>0] 342 343 srlg $i3,$s3,`16-3` # i2 344 xr $s2,$t2 345 srl $s3,`24-3` 346 nr $i3,$mask 347 nr $s3,$mask 348 349 x $s0,0($key) 350 x $s1,4($key) 351 x $s2,8($key) 352 x $t3,12($key) 353 354 x $s0,1($t1,$tbl) # Te3[s3>>0] 355 x $s1,2($ra,$tbl) # Te2[s3>>8] 356 x $s2,3($i3,$tbl) # Te1[s3>>16] 357 l $s3,0($s3,$tbl) # Te0[s3>>24] 358 xr $s3,$t3 359 360 brct $rounds,.Lenc_loop 361 .align 16 362 363 sllg $t1,$s0,`0+3` 364 srlg $t2,$s0,`8-3` 365 ngr $t1,$mask 366 srlg $t3,$s0,`16-3` 367 srl $s0,`24-3` 368 nr $s0,$mask 369 nr $t2,$mask 370 nr $t3,$mask 371 372 srlg $i1,$s1,`16-3` # i0 373 sllg $i2,$s1,`0+3` 374 ngr $i2,$mask 375 srlg $i3,$s1,`8-3` 376 srl $s1,`24-3` 377 nr $i1,$mask 378 nr $s1,$mask 379 nr $i3,$mask 380 381 llgc $s0,2($s0,$tbl) # Te4[s0>>24] 382 llgc $t1,2($t1,$tbl) # Te4[s0>>0] 383 sll $s0,24 384 llgc $t2,2($t2,$tbl) # Te4[s0>>8] 385 llgc $t3,2($t3,$tbl) # Te4[s0>>16] 386 sll $t2,8 387 sll $t3,16 388 389 llgc $i1,2($i1,$tbl) # Te4[s1>>16] 390 llgc $s1,2($s1,$tbl) # Te4[s1>>24] 391 llgc $i2,2($i2,$tbl) # Te4[s1>>0] 392 llgc $i3,2($i3,$tbl) # Te4[s1>>8] 393 sll $i1,16 394 sll $s1,24 395 sll $i3,8 396 or $s0,$i1 397 or $s1,$t1 398 or $t2,$i2 399 or $t3,$i3 400 401 srlg $i1,$s2,`8-3` # i0 402 srlg $i2,$s2,`16-3` # i1 403 nr $i1,$mask 404 nr $i2,$mask 405 sllg $i3,$s2,`0+3` 406 srl $s2,`24-3` 407 ngr $i3,$mask 408 nr $s2,$mask 409 410 sllg $t1,$s3,`0+3` # i0 411 srlg $ra,$s3,`8-3` # i1 412 ngr $t1,$mask 413 414 llgc $i1,2($i1,$tbl) # Te4[s2>>8] 415 llgc $i2,2($i2,$tbl) # Te4[s2>>16] 416 sll $i1,8 417 llgc $s2,2($s2,$tbl) # Te4[s2>>24] 418 llgc $i3,2($i3,$tbl) # Te4[s2>>0] 419 sll $i2,16 420 nr $ra,$mask 421 sll $s2,24 422 or $s0,$i1 423 or $s1,$i2 424 or $s2,$t2 425 or $t3,$i3 426 427 srlg $i3,$s3,`16-3` # i2 428 srl $s3,`24-3` 429 nr $i3,$mask 430 nr $s3,$mask 431 432 l $t0,16($key) 433 l $t2,20($key) 434 435 llgc $i1,2($t1,$tbl) # Te4[s3>>0] 436 llgc $i2,2($ra,$tbl) # Te4[s3>>8] 437 llgc $i3,2($i3,$tbl) # Te4[s3>>16] 438 llgc $s3,2($s3,$tbl) # Te4[s3>>24] 439 sll $i2,8 440 sll $i3,16 441 sll $s3,24 442 or $s0,$i1 443 or $s1,$i2 444 or $s2,$i3 445 or $s3,$t3 446 447 l${g} $ra,15*$SIZE_T($sp) 448 xr $s0,$t0 449 xr $s1,$t2 450 x $s2,24($key) 451 x $s3,28($key) 452 453 br $ra 454 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt 455 ___ 456 457 $code.=<<___; 458 .type AES_Td,\@object 459 .align 256 460 AES_Td: 461 ___ 462 &_data_word( 463 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 464 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 465 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 466 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 467 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 468 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 469 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 470 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 471 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 472 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 473 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 474 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 475 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 476 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 477 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 478 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 479 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 480 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 481 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 482 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 483 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 484 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 485 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 486 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 487 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 488 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 489 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 490 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 491 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 492 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 493 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 494 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 495 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 496 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 497 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 498 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 499 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 500 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 501 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 502 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 503 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 504 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 505 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 506 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 507 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 508 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 509 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 510 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 511 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 512 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 513 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 514 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 515 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 516 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 517 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 518 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 519 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 520 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 521 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 522 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 523 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 524 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 525 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 526 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 527 $code.=<<___; 528 # Td4[256] 529 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 530 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 531 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 532 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 533 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 534 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 535 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 536 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 537 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 538 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 539 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 540 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 541 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 542 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 543 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 544 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 545 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 546 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 547 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 548 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 549 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 550 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 551 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 552 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 553 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 554 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 555 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 556 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 557 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 558 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 559 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 560 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 561 .size AES_Td,.-AES_Td 562 563 # void AES_decrypt(const unsigned char *inp, unsigned char *out, 564 # const AES_KEY *key) { 565 .globl AES_decrypt 566 .type AES_decrypt,\@function 567 AES_decrypt: 568 ___ 569 $code.=<<___ if (!$softonly); 570 l %r0,240($key) 571 lhi %r1,16 572 clr %r0,%r1 573 jl .Ldsoft 574 575 la %r1,0($key) 576 #la %r2,0($inp) 577 la %r4,0($out) 578 lghi %r3,16 # single block length 579 .long 0xb92e0042 # km %r4,%r2 580 brc 1,.-4 # can this happen? 581 br %r14 582 .align 64 583 .Ldsoft: 584 ___ 585 $code.=<<___; 586 stm${g} %r3,$ra,3*$SIZE_T($sp) 587 588 llgf $s0,0($inp) 589 llgf $s1,4($inp) 590 llgf $s2,8($inp) 591 llgf $s3,12($inp) 592 593 larl $tbl,AES_Td 594 bras $ra,_s390x_AES_decrypt 595 596 l${g} $out,3*$SIZE_T($sp) 597 st $s0,0($out) 598 st $s1,4($out) 599 st $s2,8($out) 600 st $s3,12($out) 601 602 lm${g} %r6,$ra,6*$SIZE_T($sp) 603 br $ra 604 .size AES_decrypt,.-AES_decrypt 605 606 .type _s390x_AES_decrypt,\@function 607 .align 16 608 _s390x_AES_decrypt: 609 st${g} $ra,15*$SIZE_T($sp) 610 x $s0,0($key) 611 x $s1,4($key) 612 x $s2,8($key) 613 x $s3,12($key) 614 l $rounds,240($key) 615 llill $mask,`0xff<<3` 616 aghi $rounds,-1 617 j .Ldec_loop 618 .align 16 619 .Ldec_loop: 620 srlg $t1,$s0,`16-3` 621 srlg $t2,$s0,`8-3` 622 sllg $t3,$s0,`0+3` 623 srl $s0,`24-3` 624 nr $s0,$mask 625 nr $t1,$mask 626 nr $t2,$mask 627 ngr $t3,$mask 628 629 sllg $i1,$s1,`0+3` # i0 630 srlg $i2,$s1,`16-3` 631 srlg $i3,$s1,`8-3` 632 srl $s1,`24-3` 633 ngr $i1,$mask 634 nr $s1,$mask 635 nr $i2,$mask 636 nr $i3,$mask 637 638 l $s0,0($s0,$tbl) # Td0[s0>>24] 639 l $t1,3($t1,$tbl) # Td1[s0>>16] 640 l $t2,2($t2,$tbl) # Td2[s0>>8] 641 l $t3,1($t3,$tbl) # Td3[s0>>0] 642 643 x $s0,1($i1,$tbl) # Td3[s1>>0] 644 l $s1,0($s1,$tbl) # Td0[s1>>24] 645 x $t2,3($i2,$tbl) # Td1[s1>>16] 646 x $t3,2($i3,$tbl) # Td2[s1>>8] 647 648 srlg $i1,$s2,`8-3` # i0 649 sllg $i2,$s2,`0+3` # i1 650 srlg $i3,$s2,`16-3` 651 srl $s2,`24-3` 652 nr $i1,$mask 653 ngr $i2,$mask 654 nr $s2,$mask 655 nr $i3,$mask 656 657 xr $s1,$t1 658 srlg $ra,$s3,`8-3` # i1 659 srlg $t1,$s3,`16-3` # i0 660 nr $ra,$mask 661 la $key,16($key) 662 nr $t1,$mask 663 664 x $s0,2($i1,$tbl) # Td2[s2>>8] 665 x $s1,1($i2,$tbl) # Td3[s2>>0] 666 l $s2,0($s2,$tbl) # Td0[s2>>24] 667 x $t3,3($i3,$tbl) # Td1[s2>>16] 668 669 sllg $i3,$s3,`0+3` # i2 670 srl $s3,`24-3` 671 ngr $i3,$mask 672 nr $s3,$mask 673 674 xr $s2,$t2 675 x $s0,0($key) 676 x $s1,4($key) 677 x $s2,8($key) 678 x $t3,12($key) 679 680 x $s0,3($t1,$tbl) # Td1[s3>>16] 681 x $s1,2($ra,$tbl) # Td2[s3>>8] 682 x $s2,1($i3,$tbl) # Td3[s3>>0] 683 l $s3,0($s3,$tbl) # Td0[s3>>24] 684 xr $s3,$t3 685 686 brct $rounds,.Ldec_loop 687 .align 16 688 689 l $t1,`2048+0`($tbl) # prefetch Td4 690 l $t2,`2048+64`($tbl) 691 l $t3,`2048+128`($tbl) 692 l $i1,`2048+192`($tbl) 693 llill $mask,0xff 694 695 srlg $i3,$s0,24 # i0 696 srlg $t1,$s0,16 697 srlg $t2,$s0,8 698 nr $s0,$mask # i3 699 nr $t1,$mask 700 701 srlg $i1,$s1,24 702 nr $t2,$mask 703 srlg $i2,$s1,16 704 srlg $ra,$s1,8 705 nr $s1,$mask # i0 706 nr $i2,$mask 707 nr $ra,$mask 708 709 llgc $i3,2048($i3,$tbl) # Td4[s0>>24] 710 llgc $t1,2048($t1,$tbl) # Td4[s0>>16] 711 llgc $t2,2048($t2,$tbl) # Td4[s0>>8] 712 sll $t1,16 713 llgc $t3,2048($s0,$tbl) # Td4[s0>>0] 714 sllg $s0,$i3,24 715 sll $t2,8 716 717 llgc $s1,2048($s1,$tbl) # Td4[s1>>0] 718 llgc $i1,2048($i1,$tbl) # Td4[s1>>24] 719 llgc $i2,2048($i2,$tbl) # Td4[s1>>16] 720 sll $i1,24 721 llgc $i3,2048($ra,$tbl) # Td4[s1>>8] 722 sll $i2,16 723 sll $i3,8 724 or $s0,$s1 725 or $t1,$i1 726 or $t2,$i2 727 or $t3,$i3 728 729 srlg $i1,$s2,8 # i0 730 srlg $i2,$s2,24 731 srlg $i3,$s2,16 732 nr $s2,$mask # i1 733 nr $i1,$mask 734 nr $i3,$mask 735 llgc $i1,2048($i1,$tbl) # Td4[s2>>8] 736 llgc $s1,2048($s2,$tbl) # Td4[s2>>0] 737 llgc $i2,2048($i2,$tbl) # Td4[s2>>24] 738 llgc $i3,2048($i3,$tbl) # Td4[s2>>16] 739 sll $i1,8 740 sll $i2,24 741 or $s0,$i1 742 sll $i3,16 743 or $t2,$i2 744 or $t3,$i3 745 746 srlg $i1,$s3,16 # i0 747 srlg $i2,$s3,8 # i1 748 srlg $i3,$s3,24 749 nr $s3,$mask # i2 750 nr $i1,$mask 751 nr $i2,$mask 752 753 l${g} $ra,15*$SIZE_T($sp) 754 or $s1,$t1 755 l $t0,16($key) 756 l $t1,20($key) 757 758 llgc $i1,2048($i1,$tbl) # Td4[s3>>16] 759 llgc $i2,2048($i2,$tbl) # Td4[s3>>8] 760 sll $i1,16 761 llgc $s2,2048($s3,$tbl) # Td4[s3>>0] 762 llgc $s3,2048($i3,$tbl) # Td4[s3>>24] 763 sll $i2,8 764 sll $s3,24 765 or $s0,$i1 766 or $s1,$i2 767 or $s2,$t2 768 or $s3,$t3 769 770 xr $s0,$t0 771 xr $s1,$t1 772 x $s2,24($key) 773 x $s3,28($key) 774 775 br $ra 776 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt 777 ___ 778 779 $code.=<<___; 780 # void AES_set_encrypt_key(const unsigned char *in, int bits, 781 # AES_KEY *key) { 782 .globl private_AES_set_encrypt_key 783 .type private_AES_set_encrypt_key,\@function 784 .align 16 785 private_AES_set_encrypt_key: 786 _s390x_AES_set_encrypt_key: 787 lghi $t0,0 788 cl${g}r $inp,$t0 789 je .Lminus1 790 cl${g}r $key,$t0 791 je .Lminus1 792 793 lghi $t0,128 794 clr $bits,$t0 795 je .Lproceed 796 lghi $t0,192 797 clr $bits,$t0 798 je .Lproceed 799 lghi $t0,256 800 clr $bits,$t0 801 je .Lproceed 802 lghi %r2,-2 803 br %r14 804 805 .align 16 806 .Lproceed: 807 ___ 808 $code.=<<___ if (!$softonly); 809 # convert bits to km code, [128,192,256]->[18,19,20] 810 lhi %r5,-128 811 lhi %r0,18 812 ar %r5,$bits 813 srl %r5,6 814 ar %r5,%r0 815 816 larl %r1,OPENSSL_s390xcap_P 817 lg %r0,0(%r1) 818 tmhl %r0,0x4000 # check for message-security assist 819 jz .Lekey_internal 820 821 lghi %r0,0 # query capability vector 822 la %r1,16($sp) 823 .long 0xb92f0042 # kmc %r4,%r2 824 825 llihh %r1,0x8000 826 srlg %r1,%r1,0(%r5) 827 ng %r1,16($sp) 828 jz .Lekey_internal 829 830 lmg %r0,%r1,0($inp) # just copy 128 bits... 831 stmg %r0,%r1,0($key) 832 lhi %r0,192 833 cr $bits,%r0 834 jl 1f 835 lg %r1,16($inp) 836 stg %r1,16($key) 837 je 1f 838 lg %r1,24($inp) 839 stg %r1,24($key) 840 1: st $bits,236($key) # save bits [for debugging purposes] 841 lgr $t0,%r5 842 st %r5,240($key) # save km code 843 lghi %r2,0 844 br %r14 845 ___ 846 $code.=<<___; 847 .align 16 848 .Lekey_internal: 849 stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key 850 851 larl $tbl,AES_Te+2048 852 853 llgf $s0,0($inp) 854 llgf $s1,4($inp) 855 llgf $s2,8($inp) 856 llgf $s3,12($inp) 857 st $s0,0($key) 858 st $s1,4($key) 859 st $s2,8($key) 860 st $s3,12($key) 861 lghi $t0,128 862 cr $bits,$t0 863 jne .Lnot128 864 865 llill $mask,0xff 866 lghi $t3,0 # i=0 867 lghi $rounds,10 868 st $rounds,240($key) 869 870 llgfr $t2,$s3 # temp=rk[3] 871 srlg $i1,$s3,8 872 srlg $i2,$s3,16 873 srlg $i3,$s3,24 874 nr $t2,$mask 875 nr $i1,$mask 876 nr $i2,$mask 877 878 .align 16 879 .L128_loop: 880 la $t2,0($t2,$tbl) 881 la $i1,0($i1,$tbl) 882 la $i2,0($i2,$tbl) 883 la $i3,0($i3,$tbl) 884 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8 885 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16 886 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24 887 icm $t2,1,0($i3) # Te4[rk[3]>>24] 888 x $t2,256($t3,$tbl) # rcon[i] 889 xr $s0,$t2 # rk[4]=rk[0]^... 890 xr $s1,$s0 # rk[5]=rk[1]^rk[4] 891 xr $s2,$s1 # rk[6]=rk[2]^rk[5] 892 xr $s3,$s2 # rk[7]=rk[3]^rk[6] 893 894 llgfr $t2,$s3 # temp=rk[3] 895 srlg $i1,$s3,8 896 srlg $i2,$s3,16 897 nr $t2,$mask 898 nr $i1,$mask 899 srlg $i3,$s3,24 900 nr $i2,$mask 901 902 st $s0,16($key) 903 st $s1,20($key) 904 st $s2,24($key) 905 st $s3,28($key) 906 la $key,16($key) # key+=4 907 la $t3,4($t3) # i++ 908 brct $rounds,.L128_loop 909 lghi $t0,10 910 lghi %r2,0 911 lm${g} %r4,%r13,4*$SIZE_T($sp) 912 br $ra 913 914 .align 16 915 .Lnot128: 916 llgf $t0,16($inp) 917 llgf $t1,20($inp) 918 st $t0,16($key) 919 st $t1,20($key) 920 lghi $t0,192 921 cr $bits,$t0 922 jne .Lnot192 923 924 llill $mask,0xff 925 lghi $t3,0 # i=0 926 lghi $rounds,12 927 st $rounds,240($key) 928 lghi $rounds,8 929 930 srlg $i1,$t1,8 931 srlg $i2,$t1,16 932 srlg $i3,$t1,24 933 nr $t1,$mask 934 nr $i1,$mask 935 nr $i2,$mask 936 937 .align 16 938 .L192_loop: 939 la $t1,0($t1,$tbl) 940 la $i1,0($i1,$tbl) 941 la $i2,0($i2,$tbl) 942 la $i3,0($i3,$tbl) 943 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8 944 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16 945 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24 946 icm $t1,1,0($i3) # Te4[rk[5]>>24] 947 x $t1,256($t3,$tbl) # rcon[i] 948 xr $s0,$t1 # rk[6]=rk[0]^... 949 xr $s1,$s0 # rk[7]=rk[1]^rk[6] 950 xr $s2,$s1 # rk[8]=rk[2]^rk[7] 951 xr $s3,$s2 # rk[9]=rk[3]^rk[8] 952 953 st $s0,24($key) 954 st $s1,28($key) 955 st $s2,32($key) 956 st $s3,36($key) 957 brct $rounds,.L192_continue 958 lghi $t0,12 959 lghi %r2,0 960 lm${g} %r4,%r13,4*$SIZE_T($sp) 961 br $ra 962 963 .align 16 964 .L192_continue: 965 lgr $t1,$s3 966 x $t1,16($key) # rk[10]=rk[4]^rk[9] 967 st $t1,40($key) 968 x $t1,20($key) # rk[11]=rk[5]^rk[10] 969 st $t1,44($key) 970 971 srlg $i1,$t1,8 972 srlg $i2,$t1,16 973 srlg $i3,$t1,24 974 nr $t1,$mask 975 nr $i1,$mask 976 nr $i2,$mask 977 978 la $key,24($key) # key+=6 979 la $t3,4($t3) # i++ 980 j .L192_loop 981 982 .align 16 983 .Lnot192: 984 llgf $t0,24($inp) 985 llgf $t1,28($inp) 986 st $t0,24($key) 987 st $t1,28($key) 988 llill $mask,0xff 989 lghi $t3,0 # i=0 990 lghi $rounds,14 991 st $rounds,240($key) 992 lghi $rounds,7 993 994 srlg $i1,$t1,8 995 srlg $i2,$t1,16 996 srlg $i3,$t1,24 997 nr $t1,$mask 998 nr $i1,$mask 999 nr $i2,$mask 1000 1001 .align 16 1002 .L256_loop: 1003 la $t1,0($t1,$tbl) 1004 la $i1,0($i1,$tbl) 1005 la $i2,0($i2,$tbl) 1006 la $i3,0($i3,$tbl) 1007 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8 1008 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16 1009 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24 1010 icm $t1,1,0($i3) # Te4[rk[7]>>24] 1011 x $t1,256($t3,$tbl) # rcon[i] 1012 xr $s0,$t1 # rk[8]=rk[0]^... 1013 xr $s1,$s0 # rk[9]=rk[1]^rk[8] 1014 xr $s2,$s1 # rk[10]=rk[2]^rk[9] 1015 xr $s3,$s2 # rk[11]=rk[3]^rk[10] 1016 st $s0,32($key) 1017 st $s1,36($key) 1018 st $s2,40($key) 1019 st $s3,44($key) 1020 brct $rounds,.L256_continue 1021 lghi $t0,14 1022 lghi %r2,0 1023 lm${g} %r4,%r13,4*$SIZE_T($sp) 1024 br $ra 1025 1026 .align 16 1027 .L256_continue: 1028 lgr $t1,$s3 # temp=rk[11] 1029 srlg $i1,$s3,8 1030 srlg $i2,$s3,16 1031 srlg $i3,$s3,24 1032 nr $t1,$mask 1033 nr $i1,$mask 1034 nr $i2,$mask 1035 la $t1,0($t1,$tbl) 1036 la $i1,0($i1,$tbl) 1037 la $i2,0($i2,$tbl) 1038 la $i3,0($i3,$tbl) 1039 llgc $t1,0($t1) # Te4[rk[11]>>0] 1040 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8 1041 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16 1042 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24 1043 x $t1,16($key) # rk[12]=rk[4]^... 1044 st $t1,48($key) 1045 x $t1,20($key) # rk[13]=rk[5]^rk[12] 1046 st $t1,52($key) 1047 x $t1,24($key) # rk[14]=rk[6]^rk[13] 1048 st $t1,56($key) 1049 x $t1,28($key) # rk[15]=rk[7]^rk[14] 1050 st $t1,60($key) 1051 1052 srlg $i1,$t1,8 1053 srlg $i2,$t1,16 1054 srlg $i3,$t1,24 1055 nr $t1,$mask 1056 nr $i1,$mask 1057 nr $i2,$mask 1058 1059 la $key,32($key) # key+=8 1060 la $t3,4($t3) # i++ 1061 j .L256_loop 1062 1063 .Lminus1: 1064 lghi %r2,-1 1065 br $ra 1066 .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key 1067 1068 # void AES_set_decrypt_key(const unsigned char *in, int bits, 1069 # AES_KEY *key) { 1070 .globl private_AES_set_decrypt_key 1071 .type private_AES_set_decrypt_key,\@function 1072 .align 16 1073 private_AES_set_decrypt_key: 1074 #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to 1075 st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key! 1076 bras $ra,_s390x_AES_set_encrypt_key 1077 #l${g} $key,4*$SIZE_T($sp) 1078 l${g} $ra,14*$SIZE_T($sp) 1079 ltgr %r2,%r2 1080 bnzr $ra 1081 ___ 1082 $code.=<<___ if (!$softonly); 1083 #l $t0,240($key) 1084 lhi $t1,16 1085 cr $t0,$t1 1086 jl .Lgo 1087 oill $t0,0x80 # set "decrypt" bit 1088 st $t0,240($key) 1089 br $ra 1090 ___ 1091 $code.=<<___; 1092 .align 16 1093 .Lgo: lgr $rounds,$t0 #llgf $rounds,240($key) 1094 la $i1,0($key) 1095 sllg $i2,$rounds,4 1096 la $i2,0($i2,$key) 1097 srl $rounds,1 1098 lghi $t1,-16 1099 1100 .align 16 1101 .Linv: lmg $s0,$s1,0($i1) 1102 lmg $s2,$s3,0($i2) 1103 stmg $s0,$s1,0($i2) 1104 stmg $s2,$s3,0($i1) 1105 la $i1,16($i1) 1106 la $i2,0($t1,$i2) 1107 brct $rounds,.Linv 1108 ___ 1109 $mask80=$i1; 1110 $mask1b=$i2; 1111 $maskfe=$i3; 1112 $code.=<<___; 1113 llgf $rounds,240($key) 1114 aghi $rounds,-1 1115 sll $rounds,2 # (rounds-1)*4 1116 llilh $mask80,0x8080 1117 llilh $mask1b,0x1b1b 1118 llilh $maskfe,0xfefe 1119 oill $mask80,0x8080 1120 oill $mask1b,0x1b1b 1121 oill $maskfe,0xfefe 1122 1123 .align 16 1124 .Lmix: l $s0,16($key) # tp1 1125 lr $s1,$s0 1126 ngr $s1,$mask80 1127 srlg $t1,$s1,7 1128 slr $s1,$t1 1129 nr $s1,$mask1b 1130 sllg $t1,$s0,1 1131 nr $t1,$maskfe 1132 xr $s1,$t1 # tp2 1133 1134 lr $s2,$s1 1135 ngr $s2,$mask80 1136 srlg $t1,$s2,7 1137 slr $s2,$t1 1138 nr $s2,$mask1b 1139 sllg $t1,$s1,1 1140 nr $t1,$maskfe 1141 xr $s2,$t1 # tp4 1142 1143 lr $s3,$s2 1144 ngr $s3,$mask80 1145 srlg $t1,$s3,7 1146 slr $s3,$t1 1147 nr $s3,$mask1b 1148 sllg $t1,$s2,1 1149 nr $t1,$maskfe 1150 xr $s3,$t1 # tp8 1151 1152 xr $s1,$s0 # tp2^tp1 1153 xr $s2,$s0 # tp4^tp1 1154 rll $s0,$s0,24 # = ROTATE(tp1,8) 1155 xr $s2,$s3 # ^=tp8 1156 xr $s0,$s1 # ^=tp2^tp1 1157 xr $s1,$s3 # tp2^tp1^tp8 1158 xr $s0,$s2 # ^=tp4^tp1^tp8 1159 rll $s1,$s1,8 1160 rll $s2,$s2,16 1161 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24) 1162 rll $s3,$s3,24 1163 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16) 1164 xr $s0,$s3 # ^= ROTATE(tp8,8) 1165 1166 st $s0,16($key) 1167 la $key,4($key) 1168 brct $rounds,.Lmix 1169 1170 lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key! 1171 lghi %r2,0 1172 br $ra 1173 .size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key 1174 ___ 1175 1176 ######################################################################## 1177 # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, 1178 # size_t length, const AES_KEY *key, 1179 # unsigned char *ivec, const int enc) 1180 { 1181 my $inp="%r2"; 1182 my $out="%r4"; # length and out are swapped 1183 my $len="%r3"; 1184 my $key="%r5"; 1185 my $ivp="%r6"; 1186 1187 $code.=<<___; 1188 .globl AES_cbc_encrypt 1189 .type AES_cbc_encrypt,\@function 1190 .align 16 1191 AES_cbc_encrypt: 1192 xgr %r3,%r4 # flip %r3 and %r4, out and len 1193 xgr %r4,%r3 1194 xgr %r3,%r4 1195 ___ 1196 $code.=<<___ if (!$softonly); 1197 lhi %r0,16 1198 cl %r0,240($key) 1199 jh .Lcbc_software 1200 1201 lg %r0,0($ivp) # copy ivec 1202 lg %r1,8($ivp) 1203 stmg %r0,%r1,16($sp) 1204 lmg %r0,%r1,0($key) # copy key, cover 256 bit 1205 stmg %r0,%r1,32($sp) 1206 lmg %r0,%r1,16($key) 1207 stmg %r0,%r1,48($sp) 1208 l %r0,240($key) # load kmc code 1209 lghi $key,15 # res=len%16, len-=res; 1210 ngr $key,$len 1211 sl${g}r $len,$key 1212 la %r1,16($sp) # parameter block - ivec || key 1213 jz .Lkmc_truncated 1214 .long 0xb92f0042 # kmc %r4,%r2 1215 brc 1,.-4 # pay attention to "partial completion" 1216 ltr $key,$key 1217 jnz .Lkmc_truncated 1218 .Lkmc_done: 1219 lmg %r0,%r1,16($sp) # copy ivec to caller 1220 stg %r0,0($ivp) 1221 stg %r1,8($ivp) 1222 br $ra 1223 .align 16 1224 .Lkmc_truncated: 1225 ahi $key,-1 # it's the way it's encoded in mvc 1226 tmll %r0,0x80 1227 jnz .Lkmc_truncated_dec 1228 lghi %r1,0 1229 stg %r1,16*$SIZE_T($sp) 1230 stg %r1,16*$SIZE_T+8($sp) 1231 bras %r1,1f 1232 mvc 16*$SIZE_T(1,$sp),0($inp) 1233 1: ex $key,0(%r1) 1234 la %r1,16($sp) # restore parameter block 1235 la $inp,16*$SIZE_T($sp) 1236 lghi $len,16 1237 .long 0xb92f0042 # kmc %r4,%r2 1238 j .Lkmc_done 1239 .align 16 1240 .Lkmc_truncated_dec: 1241 st${g} $out,4*$SIZE_T($sp) 1242 la $out,16*$SIZE_T($sp) 1243 lghi $len,16 1244 .long 0xb92f0042 # kmc %r4,%r2 1245 l${g} $out,4*$SIZE_T($sp) 1246 bras %r1,2f 1247 mvc 0(1,$out),16*$SIZE_T($sp) 1248 2: ex $key,0(%r1) 1249 j .Lkmc_done 1250 .align 16 1251 .Lcbc_software: 1252 ___ 1253 $code.=<<___; 1254 stm${g} $key,$ra,5*$SIZE_T($sp) 1255 lhi %r0,0 1256 cl %r0,`$stdframe+$SIZE_T-4`($sp) 1257 je .Lcbc_decrypt 1258 1259 larl $tbl,AES_Te 1260 1261 llgf $s0,0($ivp) 1262 llgf $s1,4($ivp) 1263 llgf $s2,8($ivp) 1264 llgf $s3,12($ivp) 1265 1266 lghi $t0,16 1267 sl${g}r $len,$t0 1268 brc 4,.Lcbc_enc_tail # if borrow 1269 .Lcbc_enc_loop: 1270 stm${g} $inp,$out,2*$SIZE_T($sp) 1271 x $s0,0($inp) 1272 x $s1,4($inp) 1273 x $s2,8($inp) 1274 x $s3,12($inp) 1275 lgr %r4,$key 1276 1277 bras $ra,_s390x_AES_encrypt 1278 1279 lm${g} $inp,$key,2*$SIZE_T($sp) 1280 st $s0,0($out) 1281 st $s1,4($out) 1282 st $s2,8($out) 1283 st $s3,12($out) 1284 1285 la $inp,16($inp) 1286 la $out,16($out) 1287 lghi $t0,16 1288 lt${g}r $len,$len 1289 jz .Lcbc_enc_done 1290 sl${g}r $len,$t0 1291 brc 4,.Lcbc_enc_tail # if borrow 1292 j .Lcbc_enc_loop 1293 .align 16 1294 .Lcbc_enc_done: 1295 l${g} $ivp,6*$SIZE_T($sp) 1296 st $s0,0($ivp) 1297 st $s1,4($ivp) 1298 st $s2,8($ivp) 1299 st $s3,12($ivp) 1300 1301 lm${g} %r7,$ra,7*$SIZE_T($sp) 1302 br $ra 1303 1304 .align 16 1305 .Lcbc_enc_tail: 1306 aghi $len,15 1307 lghi $t0,0 1308 stg $t0,16*$SIZE_T($sp) 1309 stg $t0,16*$SIZE_T+8($sp) 1310 bras $t1,3f 1311 mvc 16*$SIZE_T(1,$sp),0($inp) 1312 3: ex $len,0($t1) 1313 lghi $len,0 1314 la $inp,16*$SIZE_T($sp) 1315 j .Lcbc_enc_loop 1316 1317 .align 16 1318 .Lcbc_decrypt: 1319 larl $tbl,AES_Td 1320 1321 lg $t0,0($ivp) 1322 lg $t1,8($ivp) 1323 stmg $t0,$t1,16*$SIZE_T($sp) 1324 1325 .Lcbc_dec_loop: 1326 stm${g} $inp,$out,2*$SIZE_T($sp) 1327 llgf $s0,0($inp) 1328 llgf $s1,4($inp) 1329 llgf $s2,8($inp) 1330 llgf $s3,12($inp) 1331 lgr %r4,$key 1332 1333 bras $ra,_s390x_AES_decrypt 1334 1335 lm${g} $inp,$key,2*$SIZE_T($sp) 1336 sllg $s0,$s0,32 1337 sllg $s2,$s2,32 1338 lr $s0,$s1 1339 lr $s2,$s3 1340 1341 lg $t0,0($inp) 1342 lg $t1,8($inp) 1343 xg $s0,16*$SIZE_T($sp) 1344 xg $s2,16*$SIZE_T+8($sp) 1345 lghi $s1,16 1346 sl${g}r $len,$s1 1347 brc 4,.Lcbc_dec_tail # if borrow 1348 brc 2,.Lcbc_dec_done # if zero 1349 stg $s0,0($out) 1350 stg $s2,8($out) 1351 stmg $t0,$t1,16*$SIZE_T($sp) 1352 1353 la $inp,16($inp) 1354 la $out,16($out) 1355 j .Lcbc_dec_loop 1356 1357 .Lcbc_dec_done: 1358 stg $s0,0($out) 1359 stg $s2,8($out) 1360 .Lcbc_dec_exit: 1361 lm${g} %r6,$ra,6*$SIZE_T($sp) 1362 stmg $t0,$t1,0($ivp) 1363 1364 br $ra 1365 1366 .align 16 1367 .Lcbc_dec_tail: 1368 aghi $len,15 1369 stg $s0,16*$SIZE_T($sp) 1370 stg $s2,16*$SIZE_T+8($sp) 1371 bras $s1,4f 1372 mvc 0(1,$out),16*$SIZE_T($sp) 1373 4: ex $len,0($s1) 1374 j .Lcbc_dec_exit 1375 .size AES_cbc_encrypt,.-AES_cbc_encrypt 1376 ___ 1377 } 1378 ######################################################################## 1379 # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, 1380 # size_t blocks, const AES_KEY *key, 1381 # const unsigned char *ivec) 1382 { 1383 my $inp="%r2"; 1384 my $out="%r4"; # blocks and out are swapped 1385 my $len="%r3"; 1386 my $key="%r5"; my $iv0="%r5"; 1387 my $ivp="%r6"; 1388 my $fp ="%r7"; 1389 1390 $code.=<<___; 1391 .globl AES_ctr32_encrypt 1392 .type AES_ctr32_encrypt,\@function 1393 .align 16 1394 AES_ctr32_encrypt: 1395 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 1396 xgr %r4,%r3 1397 xgr %r3,%r4 1398 llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case 1399 ___ 1400 $code.=<<___ if (!$softonly); 1401 l %r0,240($key) 1402 lhi %r1,16 1403 clr %r0,%r1 1404 jl .Lctr32_software 1405 1406 stm${g} %r6,$s3,6*$SIZE_T($sp) 1407 1408 slgr $out,$inp 1409 la %r1,0($key) # %r1 is permanent copy of $key 1410 lg $iv0,0($ivp) # load ivec 1411 lg $ivp,8($ivp) 1412 1413 # prepare and allocate stack frame at the top of 4K page 1414 # with 1K reserved for eventual signal handling 1415 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer 1416 lghi $s1,-4096 1417 algr $s0,$sp 1418 lgr $fp,$sp 1419 ngr $s0,$s1 # align at page boundary 1420 slgr $fp,$s0 # total buffer size 1421 lgr $s2,$sp 1422 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility 1423 slgr $fp,$s1 # deduct reservation to get usable buffer size 1424 # buffer size is at lest 256 and at most 3072+256-16 1425 1426 la $sp,1024($s0) # alloca 1427 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16 1428 st${g} $s2,0($sp) # back-chain 1429 st${g} $fp,$SIZE_T($sp) 1430 1431 slgr $len,$fp 1432 brc 1,.Lctr32_hw_switch # not zero, no borrow 1433 algr $fp,$len # input is shorter than allocated buffer 1434 lghi $len,0 1435 st${g} $fp,$SIZE_T($sp) 1436 1437 .Lctr32_hw_switch: 1438 ___ 1439 $code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower 1440 larl $s0,OPENSSL_s390xcap_P 1441 lg $s0,8($s0) 1442 tmhh $s0,0x0004 # check for message_security-assist-4 1443 jz .Lctr32_km_loop 1444 1445 llgfr $s0,%r0 1446 lgr $s1,%r1 1447 lghi %r0,0 1448 la %r1,16($sp) 1449 .long 0xb92d2042 # kmctr %r4,%r2,%r2 1450 1451 llihh %r0,0x8000 # check if kmctr supports the function code 1452 srlg %r0,%r0,0($s0) 1453 ng %r0,16($sp) 1454 lgr %r0,$s0 1455 lgr %r1,$s1 1456 jz .Lctr32_km_loop 1457 1458 ####### kmctr code 1459 algr $out,$inp # restore $out 1460 lgr $s1,$len # $s1 undertakes $len 1461 j .Lctr32_kmctr_loop 1462 .align 16 1463 .Lctr32_kmctr_loop: 1464 la $s2,16($sp) 1465 lgr $s3,$fp 1466 .Lctr32_kmctr_prepare: 1467 stg $iv0,0($s2) 1468 stg $ivp,8($s2) 1469 la $s2,16($s2) 1470 ahi $ivp,1 # 32-bit increment, preserves upper half 1471 brct $s3,.Lctr32_kmctr_prepare 1472 1473 #la $inp,0($inp) # inp 1474 sllg $len,$fp,4 # len 1475 #la $out,0($out) # out 1476 la $s2,16($sp) # iv 1477 .long 0xb92da042 # kmctr $out,$s2,$inp 1478 brc 1,.-4 # pay attention to "partial completion" 1479 1480 slgr $s1,$fp 1481 brc 1,.Lctr32_kmctr_loop # not zero, no borrow 1482 algr $fp,$s1 1483 lghi $s1,0 1484 brc 4+1,.Lctr32_kmctr_loop # not zero 1485 1486 l${g} $sp,0($sp) 1487 lm${g} %r6,$s3,6*$SIZE_T($sp) 1488 br $ra 1489 .align 16 1490 ___ 1491 $code.=<<___; 1492 .Lctr32_km_loop: 1493 la $s2,16($sp) 1494 lgr $s3,$fp 1495 .Lctr32_km_prepare: 1496 stg $iv0,0($s2) 1497 stg $ivp,8($s2) 1498 la $s2,16($s2) 1499 ahi $ivp,1 # 32-bit increment, preserves upper half 1500 brct $s3,.Lctr32_km_prepare 1501 1502 la $s0,16($sp) # inp 1503 sllg $s1,$fp,4 # len 1504 la $s2,16($sp) # out 1505 .long 0xb92e00a8 # km %r10,%r8 1506 brc 1,.-4 # pay attention to "partial completion" 1507 1508 la $s2,16($sp) 1509 lgr $s3,$fp 1510 slgr $s2,$inp 1511 .Lctr32_km_xor: 1512 lg $s0,0($inp) 1513 lg $s1,8($inp) 1514 xg $s0,0($s2,$inp) 1515 xg $s1,8($s2,$inp) 1516 stg $s0,0($out,$inp) 1517 stg $s1,8($out,$inp) 1518 la $inp,16($inp) 1519 brct $s3,.Lctr32_km_xor 1520 1521 slgr $len,$fp 1522 brc 1,.Lctr32_km_loop # not zero, no borrow 1523 algr $fp,$len 1524 lghi $len,0 1525 brc 4+1,.Lctr32_km_loop # not zero 1526 1527 l${g} $s0,0($sp) 1528 l${g} $s1,$SIZE_T($sp) 1529 la $s2,16($sp) 1530 .Lctr32_km_zap: 1531 stg $s0,0($s2) 1532 stg $s0,8($s2) 1533 la $s2,16($s2) 1534 brct $s1,.Lctr32_km_zap 1535 1536 la $sp,0($s0) 1537 lm${g} %r6,$s3,6*$SIZE_T($sp) 1538 br $ra 1539 .align 16 1540 .Lctr32_software: 1541 ___ 1542 $code.=<<___; 1543 stm${g} $key,$ra,5*$SIZE_T($sp) 1544 sl${g}r $inp,$out 1545 larl $tbl,AES_Te 1546 llgf $t1,12($ivp) 1547 1548 .Lctr32_loop: 1549 stm${g} $inp,$out,2*$SIZE_T($sp) 1550 llgf $s0,0($ivp) 1551 llgf $s1,4($ivp) 1552 llgf $s2,8($ivp) 1553 lgr $s3,$t1 1554 st $t1,16*$SIZE_T($sp) 1555 lgr %r4,$key 1556 1557 bras $ra,_s390x_AES_encrypt 1558 1559 lm${g} $inp,$ivp,2*$SIZE_T($sp) 1560 llgf $t1,16*$SIZE_T($sp) 1561 x $s0,0($inp,$out) 1562 x $s1,4($inp,$out) 1563 x $s2,8($inp,$out) 1564 x $s3,12($inp,$out) 1565 stm $s0,$s3,0($out) 1566 1567 la $out,16($out) 1568 ahi $t1,1 # 32-bit increment 1569 brct $len,.Lctr32_loop 1570 1571 lm${g} %r6,$ra,6*$SIZE_T($sp) 1572 br $ra 1573 .size AES_ctr32_encrypt,.-AES_ctr32_encrypt 1574 ___ 1575 } 1576 1577 ######################################################################## 1578 # void AES_xts_encrypt(const char *inp,char *out,size_t len, 1579 # const AES_KEY *key1, const AES_KEY *key2, 1580 # const unsigned char iv[16]); 1581 # 1582 { 1583 my $inp="%r2"; 1584 my $out="%r4"; # len and out are swapped 1585 my $len="%r3"; 1586 my $key1="%r5"; # $i1 1587 my $key2="%r6"; # $i2 1588 my $fp="%r7"; # $i3 1589 my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame... 1590 1591 $code.=<<___; 1592 .type _s390x_xts_km,\@function 1593 .align 16 1594 _s390x_xts_km: 1595 ___ 1596 $code.=<<___ if(1); 1597 llgfr $s0,%r0 # put aside the function code 1598 lghi $s1,0x7f 1599 nr $s1,%r0 1600 lghi %r0,0 # query capability vector 1601 la %r1,2*$SIZE_T($sp) 1602 .long 0xb92e0042 # km %r4,%r2 1603 llihh %r1,0x8000 1604 srlg %r1,%r1,32($s1) # check for 32+function code 1605 ng %r1,2*$SIZE_T($sp) 1606 lgr %r0,$s0 # restore the function code 1607 la %r1,0($key1) # restore $key1 1608 jz .Lxts_km_vanilla 1609 1610 lmg $i2,$i3,$tweak($sp) # put aside the tweak value 1611 algr $out,$inp 1612 1613 oill %r0,32 # switch to xts function code 1614 aghi $s1,-18 # 1615 sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16 1616 la %r1,$tweak-16($sp) 1617 slgr %r1,$s1 # parameter block position 1618 lmg $s0,$s3,0($key1) # load 256 bits of key material, 1619 stmg $s0,$s3,0(%r1) # and copy it to parameter block. 1620 # yes, it contains junk and overlaps 1621 # with the tweak in 128-bit case. 1622 # it's done to avoid conditional 1623 # branch. 1624 stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value 1625 1626 .long 0xb92e0042 # km %r4,%r2 1627 brc 1,.-4 # pay attention to "partial completion" 1628 1629 lrvg $s0,$tweak+0($sp) # load the last tweak 1630 lrvg $s1,$tweak+8($sp) 1631 stmg %r0,%r3,$tweak-32(%r1) # wipe copy of the key 1632 1633 nill %r0,0xffdf # switch back to original function code 1634 la %r1,0($key1) # restore pointer to $key1 1635 slgr $out,$inp 1636 1637 llgc $len,2*$SIZE_T-1($sp) 1638 nill $len,0x0f # $len%=16 1639 br $ra 1640 1641 .align 16 1642 .Lxts_km_vanilla: 1643 ___ 1644 $code.=<<___; 1645 # prepare and allocate stack frame at the top of 4K page 1646 # with 1K reserved for eventual signal handling 1647 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer 1648 lghi $s1,-4096 1649 algr $s0,$sp 1650 lgr $fp,$sp 1651 ngr $s0,$s1 # align at page boundary 1652 slgr $fp,$s0 # total buffer size 1653 lgr $s2,$sp 1654 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility 1655 slgr $fp,$s1 # deduct reservation to get usable buffer size 1656 # buffer size is at lest 256 and at most 3072+256-16 1657 1658 la $sp,1024($s0) # alloca 1659 nill $fp,0xfff0 # round to 16*n 1660 st${g} $s2,0($sp) # back-chain 1661 nill $len,0xfff0 # redundant 1662 st${g} $fp,$SIZE_T($sp) 1663 1664 slgr $len,$fp 1665 brc 1,.Lxts_km_go # not zero, no borrow 1666 algr $fp,$len # input is shorter than allocated buffer 1667 lghi $len,0 1668 st${g} $fp,$SIZE_T($sp) 1669 1670 .Lxts_km_go: 1671 lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian 1672 lrvg $s1,$tweak+8($s2) 1673 1674 la $s2,16($sp) # vector of ascending tweak values 1675 slgr $s2,$inp 1676 srlg $s3,$fp,4 1677 j .Lxts_km_start 1678 1679 .Lxts_km_loop: 1680 la $s2,16($sp) 1681 slgr $s2,$inp 1682 srlg $s3,$fp,4 1683 .Lxts_km_prepare: 1684 lghi $i1,0x87 1685 srag $i2,$s1,63 # broadcast upper bit 1686 ngr $i1,$i2 # rem 1687 srlg $i2,$s0,63 # carry bit from lower half 1688 sllg $s0,$s0,1 1689 sllg $s1,$s1,1 1690 xgr $s0,$i1 1691 ogr $s1,$i2 1692 .Lxts_km_start: 1693 lrvgr $i1,$s0 # flip byte order 1694 lrvgr $i2,$s1 1695 stg $i1,0($s2,$inp) 1696 stg $i2,8($s2,$inp) 1697 xg $i1,0($inp) 1698 xg $i2,8($inp) 1699 stg $i1,0($out,$inp) 1700 stg $i2,8($out,$inp) 1701 la $inp,16($inp) 1702 brct $s3,.Lxts_km_prepare 1703 1704 slgr $inp,$fp # rewind $inp 1705 la $s2,0($out,$inp) 1706 lgr $s3,$fp 1707 .long 0xb92e00aa # km $s2,$s2 1708 brc 1,.-4 # pay attention to "partial completion" 1709 1710 la $s2,16($sp) 1711 slgr $s2,$inp 1712 srlg $s3,$fp,4 1713 .Lxts_km_xor: 1714 lg $i1,0($out,$inp) 1715 lg $i2,8($out,$inp) 1716 xg $i1,0($s2,$inp) 1717 xg $i2,8($s2,$inp) 1718 stg $i1,0($out,$inp) 1719 stg $i2,8($out,$inp) 1720 la $inp,16($inp) 1721 brct $s3,.Lxts_km_xor 1722 1723 slgr $len,$fp 1724 brc 1,.Lxts_km_loop # not zero, no borrow 1725 algr $fp,$len 1726 lghi $len,0 1727 brc 4+1,.Lxts_km_loop # not zero 1728 1729 l${g} $i1,0($sp) # back-chain 1730 llgf $fp,`2*$SIZE_T-4`($sp) # bytes used 1731 la $i2,16($sp) 1732 srlg $fp,$fp,4 1733 .Lxts_km_zap: 1734 stg $i1,0($i2) 1735 stg $i1,8($i2) 1736 la $i2,16($i2) 1737 brct $fp,.Lxts_km_zap 1738 1739 la $sp,0($i1) 1740 llgc $len,2*$SIZE_T-1($i1) 1741 nill $len,0x0f # $len%=16 1742 bzr $ra 1743 1744 # generate one more tweak... 1745 lghi $i1,0x87 1746 srag $i2,$s1,63 # broadcast upper bit 1747 ngr $i1,$i2 # rem 1748 srlg $i2,$s0,63 # carry bit from lower half 1749 sllg $s0,$s0,1 1750 sllg $s1,$s1,1 1751 xgr $s0,$i1 1752 ogr $s1,$i2 1753 1754 ltr $len,$len # clear zero flag 1755 br $ra 1756 .size _s390x_xts_km,.-_s390x_xts_km 1757 1758 .globl AES_xts_encrypt 1759 .type AES_xts_encrypt,\@function 1760 .align 16 1761 AES_xts_encrypt: 1762 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 1763 xgr %r4,%r3 1764 xgr %r3,%r4 1765 ___ 1766 $code.=<<___ if ($SIZE_T==4); 1767 llgfr $len,$len 1768 ___ 1769 $code.=<<___; 1770 st${g} $len,1*$SIZE_T($sp) # save copy of $len 1771 srag $len,$len,4 # formally wrong, because it expands 1772 # sign byte, but who can afford asking 1773 # to process more than 2^63-1 bytes? 1774 # I use it, because it sets condition 1775 # code... 1776 bcr 8,$ra # abort if zero (i.e. less than 16) 1777 ___ 1778 $code.=<<___ if (!$softonly); 1779 llgf %r0,240($key2) 1780 lhi %r1,16 1781 clr %r0,%r1 1782 jl .Lxts_enc_software 1783 1784 stm${g} %r6,$s3,6*$SIZE_T($sp) 1785 st${g} $ra,14*$SIZE_T($sp) 1786 1787 sllg $len,$len,4 # $len&=~15 1788 slgr $out,$inp 1789 1790 # generate the tweak value 1791 l${g} $s3,$stdframe($sp) # pointer to iv 1792 la $s2,$tweak($sp) 1793 lmg $s0,$s1,0($s3) 1794 lghi $s3,16 1795 stmg $s0,$s1,0($s2) 1796 la %r1,0($key2) # $key2 is not needed anymore 1797 .long 0xb92e00aa # km $s2,$s2, generate the tweak 1798 brc 1,.-4 # can this happen? 1799 1800 l %r0,240($key1) 1801 la %r1,0($key1) # $key1 is not needed anymore 1802 bras $ra,_s390x_xts_km 1803 jz .Lxts_enc_km_done 1804 1805 aghi $inp,-16 # take one step back 1806 la $i3,0($out,$inp) # put aside real $out 1807 .Lxts_enc_km_steal: 1808 llgc $i1,16($inp) 1809 llgc $i2,0($out,$inp) 1810 stc $i1,0($out,$inp) 1811 stc $i2,16($out,$inp) 1812 la $inp,1($inp) 1813 brct $len,.Lxts_enc_km_steal 1814 1815 la $s2,0($i3) 1816 lghi $s3,16 1817 lrvgr $i1,$s0 # flip byte order 1818 lrvgr $i2,$s1 1819 xg $i1,0($s2) 1820 xg $i2,8($s2) 1821 stg $i1,0($s2) 1822 stg $i2,8($s2) 1823 .long 0xb92e00aa # km $s2,$s2 1824 brc 1,.-4 # can this happen? 1825 lrvgr $i1,$s0 # flip byte order 1826 lrvgr $i2,$s1 1827 xg $i1,0($i3) 1828 xg $i2,8($i3) 1829 stg $i1,0($i3) 1830 stg $i2,8($i3) 1831 1832 .Lxts_enc_km_done: 1833 l${g} $ra,14*$SIZE_T($sp) 1834 st${g} $sp,$tweak($sp) # wipe tweak 1835 st${g} $sp,$tweak($sp) 1836 lm${g} %r6,$s3,6*$SIZE_T($sp) 1837 br $ra 1838 .align 16 1839 .Lxts_enc_software: 1840 ___ 1841 $code.=<<___; 1842 stm${g} %r6,$ra,6*$SIZE_T($sp) 1843 1844 slgr $out,$inp 1845 1846 xgr $s0,$s0 # clear upper half 1847 xgr $s1,$s1 1848 lrv $s0,$stdframe+4($sp) # load secno 1849 lrv $s1,$stdframe+0($sp) 1850 xgr $s2,$s2 1851 xgr $s3,$s3 1852 stm${g} %r2,%r5,2*$SIZE_T($sp) 1853 la $key,0($key2) 1854 larl $tbl,AES_Te 1855 bras $ra,_s390x_AES_encrypt # generate the tweak 1856 lm${g} %r2,%r5,2*$SIZE_T($sp) 1857 stm $s0,$s3,$tweak($sp) # save the tweak 1858 j .Lxts_enc_enter 1859 1860 .align 16 1861 .Lxts_enc_loop: 1862 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 1863 lrvg $s3,$tweak+8($sp) 1864 lghi %r1,0x87 1865 srag %r0,$s3,63 # broadcast upper bit 1866 ngr %r1,%r0 # rem 1867 srlg %r0,$s1,63 # carry bit from lower half 1868 sllg $s1,$s1,1 1869 sllg $s3,$s3,1 1870 xgr $s1,%r1 1871 ogr $s3,%r0 1872 lrvgr $s1,$s1 # flip byte order 1873 lrvgr $s3,$s3 1874 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 1875 stg $s1,$tweak+0($sp) # save the tweak 1876 llgfr $s1,$s1 1877 srlg $s2,$s3,32 1878 stg $s3,$tweak+8($sp) 1879 llgfr $s3,$s3 1880 la $inp,16($inp) # $inp+=16 1881 .Lxts_enc_enter: 1882 x $s0,0($inp) # ^=*($inp) 1883 x $s1,4($inp) 1884 x $s2,8($inp) 1885 x $s3,12($inp) 1886 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing 1887 la $key,0($key1) 1888 bras $ra,_s390x_AES_encrypt 1889 lm${g} %r2,%r5,2*$SIZE_T($sp) 1890 x $s0,$tweak+0($sp) # ^=tweak 1891 x $s1,$tweak+4($sp) 1892 x $s2,$tweak+8($sp) 1893 x $s3,$tweak+12($sp) 1894 st $s0,0($out,$inp) 1895 st $s1,4($out,$inp) 1896 st $s2,8($out,$inp) 1897 st $s3,12($out,$inp) 1898 brct${g} $len,.Lxts_enc_loop 1899 1900 llgc $len,`2*$SIZE_T-1`($sp) 1901 nill $len,0x0f # $len%16 1902 jz .Lxts_enc_done 1903 1904 la $i3,0($inp,$out) # put aside real $out 1905 .Lxts_enc_steal: 1906 llgc %r0,16($inp) 1907 llgc %r1,0($out,$inp) 1908 stc %r0,0($out,$inp) 1909 stc %r1,16($out,$inp) 1910 la $inp,1($inp) 1911 brct $len,.Lxts_enc_steal 1912 la $out,0($i3) # restore real $out 1913 1914 # generate last tweak... 1915 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 1916 lrvg $s3,$tweak+8($sp) 1917 lghi %r1,0x87 1918 srag %r0,$s3,63 # broadcast upper bit 1919 ngr %r1,%r0 # rem 1920 srlg %r0,$s1,63 # carry bit from lower half 1921 sllg $s1,$s1,1 1922 sllg $s3,$s3,1 1923 xgr $s1,%r1 1924 ogr $s3,%r0 1925 lrvgr $s1,$s1 # flip byte order 1926 lrvgr $s3,$s3 1927 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 1928 stg $s1,$tweak+0($sp) # save the tweak 1929 llgfr $s1,$s1 1930 srlg $s2,$s3,32 1931 stg $s3,$tweak+8($sp) 1932 llgfr $s3,$s3 1933 1934 x $s0,0($out) # ^=*(inp)|stolen cipther-text 1935 x $s1,4($out) 1936 x $s2,8($out) 1937 x $s3,12($out) 1938 st${g} $out,4*$SIZE_T($sp) 1939 la $key,0($key1) 1940 bras $ra,_s390x_AES_encrypt 1941 l${g} $out,4*$SIZE_T($sp) 1942 x $s0,`$tweak+0`($sp) # ^=tweak 1943 x $s1,`$tweak+4`($sp) 1944 x $s2,`$tweak+8`($sp) 1945 x $s3,`$tweak+12`($sp) 1946 st $s0,0($out) 1947 st $s1,4($out) 1948 st $s2,8($out) 1949 st $s3,12($out) 1950 1951 .Lxts_enc_done: 1952 stg $sp,$tweak+0($sp) # wipe tweak 1953 stg $sp,$twesk+8($sp) 1954 lm${g} %r6,$ra,6*$SIZE_T($sp) 1955 br $ra 1956 .size AES_xts_encrypt,.-AES_xts_encrypt 1957 ___ 1958 # void AES_xts_decrypt(const char *inp,char *out,size_t len, 1959 # const AES_KEY *key1, const AES_KEY *key2,u64 secno); 1960 # 1961 $code.=<<___; 1962 .globl AES_xts_decrypt 1963 .type AES_xts_decrypt,\@function 1964 .align 16 1965 AES_xts_decrypt: 1966 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 1967 xgr %r4,%r3 1968 xgr %r3,%r4 1969 ___ 1970 $code.=<<___ if ($SIZE_T==4); 1971 llgfr $len,$len 1972 ___ 1973 $code.=<<___; 1974 st${g} $len,1*$SIZE_T($sp) # save copy of $len 1975 aghi $len,-16 1976 bcr 4,$ra # abort if less than zero. formally 1977 # wrong, because $len is unsigned, 1978 # but who can afford asking to 1979 # process more than 2^63-1 bytes? 1980 tmll $len,0x0f 1981 jnz .Lxts_dec_proceed 1982 aghi $len,16 1983 .Lxts_dec_proceed: 1984 ___ 1985 $code.=<<___ if (!$softonly); 1986 llgf %r0,240($key2) 1987 lhi %r1,16 1988 clr %r0,%r1 1989 jl .Lxts_dec_software 1990 1991 stm${g} %r6,$s3,6*$SIZE_T($sp) 1992 st${g} $ra,14*$SIZE_T($sp) 1993 1994 nill $len,0xfff0 # $len&=~15 1995 slgr $out,$inp 1996 1997 # generate the tweak value 1998 l${g} $s3,$stdframe($sp) # pointer to iv 1999 la $s2,$tweak($sp) 2000 lmg $s0,$s1,0($s3) 2001 lghi $s3,16 2002 stmg $s0,$s1,0($s2) 2003 la %r1,0($key2) # $key2 is not needed past this point 2004 .long 0xb92e00aa # km $s2,$s2, generate the tweak 2005 brc 1,.-4 # can this happen? 2006 2007 l %r0,240($key1) 2008 la %r1,0($key1) # $key1 is not needed anymore 2009 2010 ltgr $len,$len 2011 jz .Lxts_dec_km_short 2012 bras $ra,_s390x_xts_km 2013 jz .Lxts_dec_km_done 2014 2015 lrvgr $s2,$s0 # make copy in reverse byte order 2016 lrvgr $s3,$s1 2017 j .Lxts_dec_km_2ndtweak 2018 2019 .Lxts_dec_km_short: 2020 llgc $len,`2*$SIZE_T-1`($sp) 2021 nill $len,0x0f # $len%=16 2022 lrvg $s0,$tweak+0($sp) # load the tweak 2023 lrvg $s1,$tweak+8($sp) 2024 lrvgr $s2,$s0 # make copy in reverse byte order 2025 lrvgr $s3,$s1 2026 2027 .Lxts_dec_km_2ndtweak: 2028 lghi $i1,0x87 2029 srag $i2,$s1,63 # broadcast upper bit 2030 ngr $i1,$i2 # rem 2031 srlg $i2,$s0,63 # carry bit from lower half 2032 sllg $s0,$s0,1 2033 sllg $s1,$s1,1 2034 xgr $s0,$i1 2035 ogr $s1,$i2 2036 lrvgr $i1,$s0 # flip byte order 2037 lrvgr $i2,$s1 2038 2039 xg $i1,0($inp) 2040 xg $i2,8($inp) 2041 stg $i1,0($out,$inp) 2042 stg $i2,8($out,$inp) 2043 la $i2,0($out,$inp) 2044 lghi $i3,16 2045 .long 0xb92e0066 # km $i2,$i2 2046 brc 1,.-4 # can this happen? 2047 lrvgr $i1,$s0 2048 lrvgr $i2,$s1 2049 xg $i1,0($out,$inp) 2050 xg $i2,8($out,$inp) 2051 stg $i1,0($out,$inp) 2052 stg $i2,8($out,$inp) 2053 2054 la $i3,0($out,$inp) # put aside real $out 2055 .Lxts_dec_km_steal: 2056 llgc $i1,16($inp) 2057 llgc $i2,0($out,$inp) 2058 stc $i1,0($out,$inp) 2059 stc $i2,16($out,$inp) 2060 la $inp,1($inp) 2061 brct $len,.Lxts_dec_km_steal 2062 2063 lgr $s0,$s2 2064 lgr $s1,$s3 2065 xg $s0,0($i3) 2066 xg $s1,8($i3) 2067 stg $s0,0($i3) 2068 stg $s1,8($i3) 2069 la $s0,0($i3) 2070 lghi $s1,16 2071 .long 0xb92e0088 # km $s0,$s0 2072 brc 1,.-4 # can this happen? 2073 xg $s2,0($i3) 2074 xg $s3,8($i3) 2075 stg $s2,0($i3) 2076 stg $s3,8($i3) 2077 .Lxts_dec_km_done: 2078 l${g} $ra,14*$SIZE_T($sp) 2079 st${g} $sp,$tweak($sp) # wipe tweak 2080 st${g} $sp,$tweak($sp) 2081 lm${g} %r6,$s3,6*$SIZE_T($sp) 2082 br $ra 2083 .align 16 2084 .Lxts_dec_software: 2085 ___ 2086 $code.=<<___; 2087 stm${g} %r6,$ra,6*$SIZE_T($sp) 2088 2089 srlg $len,$len,4 2090 slgr $out,$inp 2091 2092 xgr $s0,$s0 # clear upper half 2093 xgr $s1,$s1 2094 lrv $s0,$stdframe+4($sp) # load secno 2095 lrv $s1,$stdframe+0($sp) 2096 xgr $s2,$s2 2097 xgr $s3,$s3 2098 stm${g} %r2,%r5,2*$SIZE_T($sp) 2099 la $key,0($key2) 2100 larl $tbl,AES_Te 2101 bras $ra,_s390x_AES_encrypt # generate the tweak 2102 lm${g} %r2,%r5,2*$SIZE_T($sp) 2103 larl $tbl,AES_Td 2104 lt${g}r $len,$len 2105 stm $s0,$s3,$tweak($sp) # save the tweak 2106 jz .Lxts_dec_short 2107 j .Lxts_dec_enter 2108 2109 .align 16 2110 .Lxts_dec_loop: 2111 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2112 lrvg $s3,$tweak+8($sp) 2113 lghi %r1,0x87 2114 srag %r0,$s3,63 # broadcast upper bit 2115 ngr %r1,%r0 # rem 2116 srlg %r0,$s1,63 # carry bit from lower half 2117 sllg $s1,$s1,1 2118 sllg $s3,$s3,1 2119 xgr $s1,%r1 2120 ogr $s3,%r0 2121 lrvgr $s1,$s1 # flip byte order 2122 lrvgr $s3,$s3 2123 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 2124 stg $s1,$tweak+0($sp) # save the tweak 2125 llgfr $s1,$s1 2126 srlg $s2,$s3,32 2127 stg $s3,$tweak+8($sp) 2128 llgfr $s3,$s3 2129 .Lxts_dec_enter: 2130 x $s0,0($inp) # tweak^=*(inp) 2131 x $s1,4($inp) 2132 x $s2,8($inp) 2133 x $s3,12($inp) 2134 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing 2135 la $key,0($key1) 2136 bras $ra,_s390x_AES_decrypt 2137 lm${g} %r2,%r5,2*$SIZE_T($sp) 2138 x $s0,$tweak+0($sp) # ^=tweak 2139 x $s1,$tweak+4($sp) 2140 x $s2,$tweak+8($sp) 2141 x $s3,$tweak+12($sp) 2142 st $s0,0($out,$inp) 2143 st $s1,4($out,$inp) 2144 st $s2,8($out,$inp) 2145 st $s3,12($out,$inp) 2146 la $inp,16($inp) 2147 brct${g} $len,.Lxts_dec_loop 2148 2149 llgc $len,`2*$SIZE_T-1`($sp) 2150 nill $len,0x0f # $len%16 2151 jz .Lxts_dec_done 2152 2153 # generate pair of tweaks... 2154 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2155 lrvg $s3,$tweak+8($sp) 2156 lghi %r1,0x87 2157 srag %r0,$s3,63 # broadcast upper bit 2158 ngr %r1,%r0 # rem 2159 srlg %r0,$s1,63 # carry bit from lower half 2160 sllg $s1,$s1,1 2161 sllg $s3,$s3,1 2162 xgr $s1,%r1 2163 ogr $s3,%r0 2164 lrvgr $i2,$s1 # flip byte order 2165 lrvgr $i3,$s3 2166 stmg $i2,$i3,$tweak($sp) # save the 1st tweak 2167 j .Lxts_dec_2ndtweak 2168 2169 .align 16 2170 .Lxts_dec_short: 2171 llgc $len,`2*$SIZE_T-1`($sp) 2172 nill $len,0x0f # $len%16 2173 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2174 lrvg $s3,$tweak+8($sp) 2175 .Lxts_dec_2ndtweak: 2176 lghi %r1,0x87 2177 srag %r0,$s3,63 # broadcast upper bit 2178 ngr %r1,%r0 # rem 2179 srlg %r0,$s1,63 # carry bit from lower half 2180 sllg $s1,$s1,1 2181 sllg $s3,$s3,1 2182 xgr $s1,%r1 2183 ogr $s3,%r0 2184 lrvgr $s1,$s1 # flip byte order 2185 lrvgr $s3,$s3 2186 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 2187 stg $s1,$tweak-16+0($sp) # save the 2nd tweak 2188 llgfr $s1,$s1 2189 srlg $s2,$s3,32 2190 stg $s3,$tweak-16+8($sp) 2191 llgfr $s3,$s3 2192 2193 x $s0,0($inp) # tweak_the_2nd^=*(inp) 2194 x $s1,4($inp) 2195 x $s2,8($inp) 2196 x $s3,12($inp) 2197 stm${g} %r2,%r3,2*$SIZE_T($sp) 2198 la $key,0($key1) 2199 bras $ra,_s390x_AES_decrypt 2200 lm${g} %r2,%r5,2*$SIZE_T($sp) 2201 x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd 2202 x $s1,$tweak-16+4($sp) 2203 x $s2,$tweak-16+8($sp) 2204 x $s3,$tweak-16+12($sp) 2205 st $s0,0($out,$inp) 2206 st $s1,4($out,$inp) 2207 st $s2,8($out,$inp) 2208 st $s3,12($out,$inp) 2209 2210 la $i3,0($out,$inp) # put aside real $out 2211 .Lxts_dec_steal: 2212 llgc %r0,16($inp) 2213 llgc %r1,0($out,$inp) 2214 stc %r0,0($out,$inp) 2215 stc %r1,16($out,$inp) 2216 la $inp,1($inp) 2217 brct $len,.Lxts_dec_steal 2218 la $out,0($i3) # restore real $out 2219 2220 lm $s0,$s3,$tweak($sp) # load the 1st tweak 2221 x $s0,0($out) # tweak^=*(inp)|stolen cipher-text 2222 x $s1,4($out) 2223 x $s2,8($out) 2224 x $s3,12($out) 2225 st${g} $out,4*$SIZE_T($sp) 2226 la $key,0($key1) 2227 bras $ra,_s390x_AES_decrypt 2228 l${g} $out,4*$SIZE_T($sp) 2229 x $s0,$tweak+0($sp) # ^=tweak 2230 x $s1,$tweak+4($sp) 2231 x $s2,$tweak+8($sp) 2232 x $s3,$tweak+12($sp) 2233 st $s0,0($out) 2234 st $s1,4($out) 2235 st $s2,8($out) 2236 st $s3,12($out) 2237 stg $sp,$tweak-16+0($sp) # wipe 2nd tweak 2238 stg $sp,$tweak-16+8($sp) 2239 .Lxts_dec_done: 2240 stg $sp,$tweak+0($sp) # wipe tweak 2241 stg $sp,$twesk+8($sp) 2242 lm${g} %r6,$ra,6*$SIZE_T($sp) 2243 br $ra 2244 .size AES_xts_decrypt,.-AES_xts_decrypt 2245 ___ 2246 } 2247 $code.=<<___; 2248 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" 2249 .comm OPENSSL_s390xcap_P,16,8 2250 ___ 2251 2252 $code =~ s/\`([^\`]*)\`/eval $1/gem; 2253 print $code; 2254 close STDOUT; # force flush 2255