1 #!/usr/bin/env perl 2 3 # ==================================================================== 4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL 5 # project. The module is, however, dual licensed under OpenSSL and 6 # CRYPTOGAMS licenses depending on where you obtain it. For further 7 # details see http://www.openssl.org/~appro/cryptogams/. 8 # ==================================================================== 9 10 # AES for s390x. 11 12 # April 2007. 13 # 14 # Software performance improvement over gcc-generated code is ~70% and 15 # in absolute terms is ~73 cycles per byte processed with 128-bit key. 16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are 17 # *strictly* in-order execution and issued instruction [in this case 18 # load value from memory is critical] has to complete before execution 19 # flow proceeds. S-boxes are compressed to 2KB[+256B]. 20 # 21 # As for hardware acceleration support. It's basically a "teaser," as 22 # it can and should be improved in several ways. Most notably support 23 # for CBC is not utilized, nor multiple blocks are ever processed. 24 # Then software key schedule can be postponed till hardware support 25 # detection... Performance improvement over assembler is reportedly 26 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper 27 # support is implemented. 28 29 # May 2007. 30 # 31 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided 32 # for 128-bit keys, if hardware support is detected. 33 34 # Januray 2009. 35 # 36 # Add support for hardware AES192/256 and reschedule instructions to 37 # minimize/avoid Address Generation Interlock hazard and to favour 38 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and 39 # almost 50% on z9. The gain is smaller on z10, because being dual- 40 # issue z10 makes it improssible to eliminate the interlock condition: 41 # critial path is not long enough. Yet it spends ~24 cycles per byte 42 # processed with 128-bit key. 43 # 44 # Unlike previous version hardware support detection takes place only 45 # at the moment of key schedule setup, which is denoted in key->rounds. 46 # This is done, because deferred key setup can't be made MT-safe, not 47 # for key lengthes longer than 128 bits. 48 # 49 # Add AES_cbc_encrypt, which gives incredible performance improvement, 50 # it was measured to be ~6.6x. It's less than previously mentioned 8x, 51 # because software implementation was optimized. 52 53 $softonly=0; # allow hardware support 54 55 $t0="%r0"; $mask="%r0"; 56 $t1="%r1"; 57 $t2="%r2"; $inp="%r2"; 58 $t3="%r3"; $out="%r3"; $bits="%r3"; 59 $key="%r4"; 60 $i1="%r5"; 61 $i2="%r6"; 62 $i3="%r7"; 63 $s0="%r8"; 64 $s1="%r9"; 65 $s2="%r10"; 66 $s3="%r11"; 67 $tbl="%r12"; 68 $rounds="%r13"; 69 $ra="%r14"; 70 $sp="%r15"; 71 72 sub _data_word() 73 { my $i; 74 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } 75 } 76 77 $code=<<___; 78 .text 79 80 .type AES_Te,\@object 81 .align 256 82 AES_Te: 83 ___ 84 &_data_word( 85 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 86 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 87 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 88 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 89 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 90 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 91 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 92 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 93 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 94 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 95 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 96 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 97 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 98 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 99 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 100 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 101 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 102 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 103 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 104 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 105 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 106 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 107 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 108 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 109 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 110 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 111 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 112 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 113 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 114 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 115 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 116 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 117 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 118 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 119 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 120 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 121 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 122 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 123 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 124 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 125 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 126 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 127 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 128 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 129 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 130 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 131 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 132 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 133 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 134 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 135 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 136 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 137 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 138 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 139 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 140 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 141 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 142 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 143 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 144 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 145 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 146 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 147 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 148 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 149 $code.=<<___; 150 # Te4[256] 151 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 152 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 153 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 154 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 155 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 156 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 157 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 158 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 159 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 160 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 161 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 162 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 163 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 164 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 165 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 166 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 167 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 168 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 169 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 170 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 171 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 172 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 173 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 174 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 175 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 176 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 177 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 178 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 179 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 180 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 181 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 182 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 183 # rcon[] 184 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000 185 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000 186 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 187 .align 256 188 .size AES_Te,.-AES_Te 189 190 # void AES_encrypt(const unsigned char *inp, unsigned char *out, 191 # const AES_KEY *key) { 192 .globl AES_encrypt 193 .type AES_encrypt,\@function 194 AES_encrypt: 195 ___ 196 $code.=<<___ if (!$softonly); 197 l %r0,240($key) 198 lhi %r1,16 199 clr %r0,%r1 200 jl .Lesoft 201 202 la %r1,0($key) 203 #la %r2,0($inp) 204 la %r4,0($out) 205 lghi %r3,16 # single block length 206 .long 0xb92e0042 # km %r4,%r2 207 brc 1,.-4 # can this happen? 208 br %r14 209 .align 64 210 .Lesoft: 211 ___ 212 $code.=<<___; 213 stmg %r3,$ra,24($sp) 214 215 llgf $s0,0($inp) 216 llgf $s1,4($inp) 217 llgf $s2,8($inp) 218 llgf $s3,12($inp) 219 220 larl $tbl,AES_Te 221 bras $ra,_s390x_AES_encrypt 222 223 lg $out,24($sp) 224 st $s0,0($out) 225 st $s1,4($out) 226 st $s2,8($out) 227 st $s3,12($out) 228 229 lmg %r6,$ra,48($sp) 230 br $ra 231 .size AES_encrypt,.-AES_encrypt 232 233 .type _s390x_AES_encrypt,\@function 234 .align 16 235 _s390x_AES_encrypt: 236 stg $ra,152($sp) 237 x $s0,0($key) 238 x $s1,4($key) 239 x $s2,8($key) 240 x $s3,12($key) 241 l $rounds,240($key) 242 llill $mask,`0xff<<3` 243 aghi $rounds,-1 244 j .Lenc_loop 245 .align 16 246 .Lenc_loop: 247 sllg $t1,$s0,`0+3` 248 srlg $t2,$s0,`8-3` 249 srlg $t3,$s0,`16-3` 250 srl $s0,`24-3` 251 nr $s0,$mask 252 ngr $t1,$mask 253 nr $t2,$mask 254 nr $t3,$mask 255 256 srlg $i1,$s1,`16-3` # i0 257 sllg $i2,$s1,`0+3` 258 srlg $i3,$s1,`8-3` 259 srl $s1,`24-3` 260 nr $i1,$mask 261 nr $s1,$mask 262 ngr $i2,$mask 263 nr $i3,$mask 264 265 l $s0,0($s0,$tbl) # Te0[s0>>24] 266 l $t1,1($t1,$tbl) # Te3[s0>>0] 267 l $t2,2($t2,$tbl) # Te2[s0>>8] 268 l $t3,3($t3,$tbl) # Te1[s0>>16] 269 270 x $s0,3($i1,$tbl) # Te1[s1>>16] 271 l $s1,0($s1,$tbl) # Te0[s1>>24] 272 x $t2,1($i2,$tbl) # Te3[s1>>0] 273 x $t3,2($i3,$tbl) # Te2[s1>>8] 274 275 srlg $i1,$s2,`8-3` # i0 276 srlg $i2,$s2,`16-3` # i1 277 nr $i1,$mask 278 nr $i2,$mask 279 sllg $i3,$s2,`0+3` 280 srl $s2,`24-3` 281 nr $s2,$mask 282 ngr $i3,$mask 283 284 xr $s1,$t1 285 srlg $ra,$s3,`8-3` # i1 286 sllg $t1,$s3,`0+3` # i0 287 nr $ra,$mask 288 la $key,16($key) 289 ngr $t1,$mask 290 291 x $s0,2($i1,$tbl) # Te2[s2>>8] 292 x $s1,3($i2,$tbl) # Te1[s2>>16] 293 l $s2,0($s2,$tbl) # Te0[s2>>24] 294 x $t3,1($i3,$tbl) # Te3[s2>>0] 295 296 srlg $i3,$s3,`16-3` # i2 297 xr $s2,$t2 298 srl $s3,`24-3` 299 nr $i3,$mask 300 nr $s3,$mask 301 302 x $s0,0($key) 303 x $s1,4($key) 304 x $s2,8($key) 305 x $t3,12($key) 306 307 x $s0,1($t1,$tbl) # Te3[s3>>0] 308 x $s1,2($ra,$tbl) # Te2[s3>>8] 309 x $s2,3($i3,$tbl) # Te1[s3>>16] 310 l $s3,0($s3,$tbl) # Te0[s3>>24] 311 xr $s3,$t3 312 313 brct $rounds,.Lenc_loop 314 .align 16 315 316 sllg $t1,$s0,`0+3` 317 srlg $t2,$s0,`8-3` 318 ngr $t1,$mask 319 srlg $t3,$s0,`16-3` 320 srl $s0,`24-3` 321 nr $s0,$mask 322 nr $t2,$mask 323 nr $t3,$mask 324 325 srlg $i1,$s1,`16-3` # i0 326 sllg $i2,$s1,`0+3` 327 ngr $i2,$mask 328 srlg $i3,$s1,`8-3` 329 srl $s1,`24-3` 330 nr $i1,$mask 331 nr $s1,$mask 332 nr $i3,$mask 333 334 llgc $s0,2($s0,$tbl) # Te4[s0>>24] 335 llgc $t1,2($t1,$tbl) # Te4[s0>>0] 336 sll $s0,24 337 llgc $t2,2($t2,$tbl) # Te4[s0>>8] 338 llgc $t3,2($t3,$tbl) # Te4[s0>>16] 339 sll $t2,8 340 sll $t3,16 341 342 llgc $i1,2($i1,$tbl) # Te4[s1>>16] 343 llgc $s1,2($s1,$tbl) # Te4[s1>>24] 344 llgc $i2,2($i2,$tbl) # Te4[s1>>0] 345 llgc $i3,2($i3,$tbl) # Te4[s1>>8] 346 sll $i1,16 347 sll $s1,24 348 sll $i3,8 349 or $s0,$i1 350 or $s1,$t1 351 or $t2,$i2 352 or $t3,$i3 353 354 srlg $i1,$s2,`8-3` # i0 355 srlg $i2,$s2,`16-3` # i1 356 nr $i1,$mask 357 nr $i2,$mask 358 sllg $i3,$s2,`0+3` 359 srl $s2,`24-3` 360 ngr $i3,$mask 361 nr $s2,$mask 362 363 sllg $t1,$s3,`0+3` # i0 364 srlg $ra,$s3,`8-3` # i1 365 ngr $t1,$mask 366 367 llgc $i1,2($i1,$tbl) # Te4[s2>>8] 368 llgc $i2,2($i2,$tbl) # Te4[s2>>16] 369 sll $i1,8 370 llgc $s2,2($s2,$tbl) # Te4[s2>>24] 371 llgc $i3,2($i3,$tbl) # Te4[s2>>0] 372 sll $i2,16 373 nr $ra,$mask 374 sll $s2,24 375 or $s0,$i1 376 or $s1,$i2 377 or $s2,$t2 378 or $t3,$i3 379 380 srlg $i3,$s3,`16-3` # i2 381 srl $s3,`24-3` 382 nr $i3,$mask 383 nr $s3,$mask 384 385 l $t0,16($key) 386 l $t2,20($key) 387 388 llgc $i1,2($t1,$tbl) # Te4[s3>>0] 389 llgc $i2,2($ra,$tbl) # Te4[s3>>8] 390 llgc $i3,2($i3,$tbl) # Te4[s3>>16] 391 llgc $s3,2($s3,$tbl) # Te4[s3>>24] 392 sll $i2,8 393 sll $i3,16 394 sll $s3,24 395 or $s0,$i1 396 or $s1,$i2 397 or $s2,$i3 398 or $s3,$t3 399 400 lg $ra,152($sp) 401 xr $s0,$t0 402 xr $s1,$t2 403 x $s2,24($key) 404 x $s3,28($key) 405 406 br $ra 407 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt 408 ___ 409 410 $code.=<<___; 411 .type AES_Td,\@object 412 .align 256 413 AES_Td: 414 ___ 415 &_data_word( 416 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 417 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 418 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 419 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 420 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 421 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 422 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 423 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 424 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 425 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 426 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 427 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 428 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 429 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 430 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 431 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 432 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 433 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 434 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 435 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 436 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 437 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 438 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 439 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 440 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 441 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 442 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 443 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 444 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 445 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 446 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 447 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 448 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 449 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 450 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 451 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 452 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 453 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 454 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 455 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 456 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 457 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 458 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 459 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 460 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 461 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 462 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 463 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 464 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 465 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 466 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 467 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 468 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 469 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 470 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 471 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 472 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 473 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 474 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 475 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 476 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 477 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 478 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 479 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 480 $code.=<<___; 481 # Td4[256] 482 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 483 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 484 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 485 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 486 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 487 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 488 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 489 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 490 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 491 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 492 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 493 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 494 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 495 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 496 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 497 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 498 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 499 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 500 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 501 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 502 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 503 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 504 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 505 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 506 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 507 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 508 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 509 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 510 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 511 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 512 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 513 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 514 .size AES_Td,.-AES_Td 515 516 # void AES_decrypt(const unsigned char *inp, unsigned char *out, 517 # const AES_KEY *key) { 518 .globl AES_decrypt 519 .type AES_decrypt,\@function 520 AES_decrypt: 521 ___ 522 $code.=<<___ if (!$softonly); 523 l %r0,240($key) 524 lhi %r1,16 525 clr %r0,%r1 526 jl .Ldsoft 527 528 la %r1,0($key) 529 #la %r2,0($inp) 530 la %r4,0($out) 531 lghi %r3,16 # single block length 532 .long 0xb92e0042 # km %r4,%r2 533 brc 1,.-4 # can this happen? 534 br %r14 535 .align 64 536 .Ldsoft: 537 ___ 538 $code.=<<___; 539 stmg %r3,$ra,24($sp) 540 541 llgf $s0,0($inp) 542 llgf $s1,4($inp) 543 llgf $s2,8($inp) 544 llgf $s3,12($inp) 545 546 larl $tbl,AES_Td 547 bras $ra,_s390x_AES_decrypt 548 549 lg $out,24($sp) 550 st $s0,0($out) 551 st $s1,4($out) 552 st $s2,8($out) 553 st $s3,12($out) 554 555 lmg %r6,$ra,48($sp) 556 br $ra 557 .size AES_decrypt,.-AES_decrypt 558 559 .type _s390x_AES_decrypt,\@function 560 .align 16 561 _s390x_AES_decrypt: 562 stg $ra,152($sp) 563 x $s0,0($key) 564 x $s1,4($key) 565 x $s2,8($key) 566 x $s3,12($key) 567 l $rounds,240($key) 568 llill $mask,`0xff<<3` 569 aghi $rounds,-1 570 j .Ldec_loop 571 .align 16 572 .Ldec_loop: 573 srlg $t1,$s0,`16-3` 574 srlg $t2,$s0,`8-3` 575 sllg $t3,$s0,`0+3` 576 srl $s0,`24-3` 577 nr $s0,$mask 578 nr $t1,$mask 579 nr $t2,$mask 580 ngr $t3,$mask 581 582 sllg $i1,$s1,`0+3` # i0 583 srlg $i2,$s1,`16-3` 584 srlg $i3,$s1,`8-3` 585 srl $s1,`24-3` 586 ngr $i1,$mask 587 nr $s1,$mask 588 nr $i2,$mask 589 nr $i3,$mask 590 591 l $s0,0($s0,$tbl) # Td0[s0>>24] 592 l $t1,3($t1,$tbl) # Td1[s0>>16] 593 l $t2,2($t2,$tbl) # Td2[s0>>8] 594 l $t3,1($t3,$tbl) # Td3[s0>>0] 595 596 x $s0,1($i1,$tbl) # Td3[s1>>0] 597 l $s1,0($s1,$tbl) # Td0[s1>>24] 598 x $t2,3($i2,$tbl) # Td1[s1>>16] 599 x $t3,2($i3,$tbl) # Td2[s1>>8] 600 601 srlg $i1,$s2,`8-3` # i0 602 sllg $i2,$s2,`0+3` # i1 603 srlg $i3,$s2,`16-3` 604 srl $s2,`24-3` 605 nr $i1,$mask 606 ngr $i2,$mask 607 nr $s2,$mask 608 nr $i3,$mask 609 610 xr $s1,$t1 611 srlg $ra,$s3,`8-3` # i1 612 srlg $t1,$s3,`16-3` # i0 613 nr $ra,$mask 614 la $key,16($key) 615 nr $t1,$mask 616 617 x $s0,2($i1,$tbl) # Td2[s2>>8] 618 x $s1,1($i2,$tbl) # Td3[s2>>0] 619 l $s2,0($s2,$tbl) # Td0[s2>>24] 620 x $t3,3($i3,$tbl) # Td1[s2>>16] 621 622 sllg $i3,$s3,`0+3` # i2 623 srl $s3,`24-3` 624 ngr $i3,$mask 625 nr $s3,$mask 626 627 xr $s2,$t2 628 x $s0,0($key) 629 x $s1,4($key) 630 x $s2,8($key) 631 x $t3,12($key) 632 633 x $s0,3($t1,$tbl) # Td1[s3>>16] 634 x $s1,2($ra,$tbl) # Td2[s3>>8] 635 x $s2,1($i3,$tbl) # Td3[s3>>0] 636 l $s3,0($s3,$tbl) # Td0[s3>>24] 637 xr $s3,$t3 638 639 brct $rounds,.Ldec_loop 640 .align 16 641 642 l $t1,`2048+0`($tbl) # prefetch Td4 643 l $t2,`2048+64`($tbl) 644 l $t3,`2048+128`($tbl) 645 l $i1,`2048+192`($tbl) 646 llill $mask,0xff 647 648 srlg $i3,$s0,24 # i0 649 srlg $t1,$s0,16 650 srlg $t2,$s0,8 651 nr $s0,$mask # i3 652 nr $t1,$mask 653 654 srlg $i1,$s1,24 655 nr $t2,$mask 656 srlg $i2,$s1,16 657 srlg $ra,$s1,8 658 nr $s1,$mask # i0 659 nr $i2,$mask 660 nr $ra,$mask 661 662 llgc $i3,2048($i3,$tbl) # Td4[s0>>24] 663 llgc $t1,2048($t1,$tbl) # Td4[s0>>16] 664 llgc $t2,2048($t2,$tbl) # Td4[s0>>8] 665 sll $t1,16 666 llgc $t3,2048($s0,$tbl) # Td4[s0>>0] 667 sllg $s0,$i3,24 668 sll $t2,8 669 670 llgc $s1,2048($s1,$tbl) # Td4[s1>>0] 671 llgc $i1,2048($i1,$tbl) # Td4[s1>>24] 672 llgc $i2,2048($i2,$tbl) # Td4[s1>>16] 673 sll $i1,24 674 llgc $i3,2048($ra,$tbl) # Td4[s1>>8] 675 sll $i2,16 676 sll $i3,8 677 or $s0,$s1 678 or $t1,$i1 679 or $t2,$i2 680 or $t3,$i3 681 682 srlg $i1,$s2,8 # i0 683 srlg $i2,$s2,24 684 srlg $i3,$s2,16 685 nr $s2,$mask # i1 686 nr $i1,$mask 687 nr $i3,$mask 688 llgc $i1,2048($i1,$tbl) # Td4[s2>>8] 689 llgc $s1,2048($s2,$tbl) # Td4[s2>>0] 690 llgc $i2,2048($i2,$tbl) # Td4[s2>>24] 691 llgc $i3,2048($i3,$tbl) # Td4[s2>>16] 692 sll $i1,8 693 sll $i2,24 694 or $s0,$i1 695 sll $i3,16 696 or $t2,$i2 697 or $t3,$i3 698 699 srlg $i1,$s3,16 # i0 700 srlg $i2,$s3,8 # i1 701 srlg $i3,$s3,24 702 nr $s3,$mask # i2 703 nr $i1,$mask 704 nr $i2,$mask 705 706 lg $ra,152($sp) 707 or $s1,$t1 708 l $t0,16($key) 709 l $t1,20($key) 710 711 llgc $i1,2048($i1,$tbl) # Td4[s3>>16] 712 llgc $i2,2048($i2,$tbl) # Td4[s3>>8] 713 sll $i1,16 714 llgc $s2,2048($s3,$tbl) # Td4[s3>>0] 715 llgc $s3,2048($i3,$tbl) # Td4[s3>>24] 716 sll $i2,8 717 sll $s3,24 718 or $s0,$i1 719 or $s1,$i2 720 or $s2,$t2 721 or $s3,$t3 722 723 xr $s0,$t0 724 xr $s1,$t1 725 x $s2,24($key) 726 x $s3,28($key) 727 728 br $ra 729 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt 730 ___ 731 732 $code.=<<___; 733 # void AES_set_encrypt_key(const unsigned char *in, int bits, 734 # AES_KEY *key) { 735 .globl AES_set_encrypt_key 736 .type AES_set_encrypt_key,\@function 737 .align 16 738 AES_set_encrypt_key: 739 lghi $t0,0 740 clgr $inp,$t0 741 je .Lminus1 742 clgr $key,$t0 743 je .Lminus1 744 745 lghi $t0,128 746 clr $bits,$t0 747 je .Lproceed 748 lghi $t0,192 749 clr $bits,$t0 750 je .Lproceed 751 lghi $t0,256 752 clr $bits,$t0 753 je .Lproceed 754 lghi %r2,-2 755 br %r14 756 757 .align 16 758 .Lproceed: 759 ___ 760 $code.=<<___ if (!$softonly); 761 # convert bits to km code, [128,192,256]->[18,19,20] 762 lhi %r5,-128 763 lhi %r0,18 764 ar %r5,$bits 765 srl %r5,6 766 ar %r5,%r0 767 768 larl %r1,OPENSSL_s390xcap_P 769 lg %r0,0(%r1) 770 tmhl %r0,0x4000 # check for message-security assist 771 jz .Lekey_internal 772 773 lghi %r0,0 # query capability vector 774 la %r1,16($sp) 775 .long 0xb92f0042 # kmc %r4,%r2 776 777 llihh %r1,0x8000 778 srlg %r1,%r1,0(%r5) 779 ng %r1,16($sp) 780 jz .Lekey_internal 781 782 lmg %r0,%r1,0($inp) # just copy 128 bits... 783 stmg %r0,%r1,0($key) 784 lhi %r0,192 785 cr $bits,%r0 786 jl 1f 787 lg %r1,16($inp) 788 stg %r1,16($key) 789 je 1f 790 lg %r1,24($inp) 791 stg %r1,24($key) 792 1: st $bits,236($key) # save bits 793 st %r5,240($key) # save km code 794 lghi %r2,0 795 br %r14 796 ___ 797 $code.=<<___; 798 .align 16 799 .Lekey_internal: 800 stmg %r6,%r13,48($sp) # all non-volatile regs 801 802 larl $tbl,AES_Te+2048 803 804 llgf $s0,0($inp) 805 llgf $s1,4($inp) 806 llgf $s2,8($inp) 807 llgf $s3,12($inp) 808 st $s0,0($key) 809 st $s1,4($key) 810 st $s2,8($key) 811 st $s3,12($key) 812 lghi $t0,128 813 cr $bits,$t0 814 jne .Lnot128 815 816 llill $mask,0xff 817 lghi $t3,0 # i=0 818 lghi $rounds,10 819 st $rounds,240($key) 820 821 llgfr $t2,$s3 # temp=rk[3] 822 srlg $i1,$s3,8 823 srlg $i2,$s3,16 824 srlg $i3,$s3,24 825 nr $t2,$mask 826 nr $i1,$mask 827 nr $i2,$mask 828 829 .align 16 830 .L128_loop: 831 la $t2,0($t2,$tbl) 832 la $i1,0($i1,$tbl) 833 la $i2,0($i2,$tbl) 834 la $i3,0($i3,$tbl) 835 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8 836 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16 837 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24 838 icm $t2,1,0($i3) # Te4[rk[3]>>24] 839 x $t2,256($t3,$tbl) # rcon[i] 840 xr $s0,$t2 # rk[4]=rk[0]^... 841 xr $s1,$s0 # rk[5]=rk[1]^rk[4] 842 xr $s2,$s1 # rk[6]=rk[2]^rk[5] 843 xr $s3,$s2 # rk[7]=rk[3]^rk[6] 844 845 llgfr $t2,$s3 # temp=rk[3] 846 srlg $i1,$s3,8 847 srlg $i2,$s3,16 848 nr $t2,$mask 849 nr $i1,$mask 850 srlg $i3,$s3,24 851 nr $i2,$mask 852 853 st $s0,16($key) 854 st $s1,20($key) 855 st $s2,24($key) 856 st $s3,28($key) 857 la $key,16($key) # key+=4 858 la $t3,4($t3) # i++ 859 brct $rounds,.L128_loop 860 lghi %r2,0 861 lmg %r6,%r13,48($sp) 862 br $ra 863 864 .align 16 865 .Lnot128: 866 llgf $t0,16($inp) 867 llgf $t1,20($inp) 868 st $t0,16($key) 869 st $t1,20($key) 870 lghi $t0,192 871 cr $bits,$t0 872 jne .Lnot192 873 874 llill $mask,0xff 875 lghi $t3,0 # i=0 876 lghi $rounds,12 877 st $rounds,240($key) 878 lghi $rounds,8 879 880 srlg $i1,$t1,8 881 srlg $i2,$t1,16 882 srlg $i3,$t1,24 883 nr $t1,$mask 884 nr $i1,$mask 885 nr $i2,$mask 886 887 .align 16 888 .L192_loop: 889 la $t1,0($t1,$tbl) 890 la $i1,0($i1,$tbl) 891 la $i2,0($i2,$tbl) 892 la $i3,0($i3,$tbl) 893 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8 894 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16 895 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24 896 icm $t1,1,0($i3) # Te4[rk[5]>>24] 897 x $t1,256($t3,$tbl) # rcon[i] 898 xr $s0,$t1 # rk[6]=rk[0]^... 899 xr $s1,$s0 # rk[7]=rk[1]^rk[6] 900 xr $s2,$s1 # rk[8]=rk[2]^rk[7] 901 xr $s3,$s2 # rk[9]=rk[3]^rk[8] 902 903 st $s0,24($key) 904 st $s1,28($key) 905 st $s2,32($key) 906 st $s3,36($key) 907 brct $rounds,.L192_continue 908 lghi %r2,0 909 lmg %r6,%r13,48($sp) 910 br $ra 911 912 .align 16 913 .L192_continue: 914 lgr $t1,$s3 915 x $t1,16($key) # rk[10]=rk[4]^rk[9] 916 st $t1,40($key) 917 x $t1,20($key) # rk[11]=rk[5]^rk[10] 918 st $t1,44($key) 919 920 srlg $i1,$t1,8 921 srlg $i2,$t1,16 922 srlg $i3,$t1,24 923 nr $t1,$mask 924 nr $i1,$mask 925 nr $i2,$mask 926 927 la $key,24($key) # key+=6 928 la $t3,4($t3) # i++ 929 j .L192_loop 930 931 .align 16 932 .Lnot192: 933 llgf $t0,24($inp) 934 llgf $t1,28($inp) 935 st $t0,24($key) 936 st $t1,28($key) 937 llill $mask,0xff 938 lghi $t3,0 # i=0 939 lghi $rounds,14 940 st $rounds,240($key) 941 lghi $rounds,7 942 943 srlg $i1,$t1,8 944 srlg $i2,$t1,16 945 srlg $i3,$t1,24 946 nr $t1,$mask 947 nr $i1,$mask 948 nr $i2,$mask 949 950 .align 16 951 .L256_loop: 952 la $t1,0($t1,$tbl) 953 la $i1,0($i1,$tbl) 954 la $i2,0($i2,$tbl) 955 la $i3,0($i3,$tbl) 956 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8 957 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16 958 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24 959 icm $t1,1,0($i3) # Te4[rk[7]>>24] 960 x $t1,256($t3,$tbl) # rcon[i] 961 xr $s0,$t1 # rk[8]=rk[0]^... 962 xr $s1,$s0 # rk[9]=rk[1]^rk[8] 963 xr $s2,$s1 # rk[10]=rk[2]^rk[9] 964 xr $s3,$s2 # rk[11]=rk[3]^rk[10] 965 st $s0,32($key) 966 st $s1,36($key) 967 st $s2,40($key) 968 st $s3,44($key) 969 brct $rounds,.L256_continue 970 lghi %r2,0 971 lmg %r6,%r13,48($sp) 972 br $ra 973 974 .align 16 975 .L256_continue: 976 lgr $t1,$s3 # temp=rk[11] 977 srlg $i1,$s3,8 978 srlg $i2,$s3,16 979 srlg $i3,$s3,24 980 nr $t1,$mask 981 nr $i1,$mask 982 nr $i2,$mask 983 la $t1,0($t1,$tbl) 984 la $i1,0($i1,$tbl) 985 la $i2,0($i2,$tbl) 986 la $i3,0($i3,$tbl) 987 llgc $t1,0($t1) # Te4[rk[11]>>0] 988 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8 989 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16 990 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24 991 x $t1,16($key) # rk[12]=rk[4]^... 992 st $t1,48($key) 993 x $t1,20($key) # rk[13]=rk[5]^rk[12] 994 st $t1,52($key) 995 x $t1,24($key) # rk[14]=rk[6]^rk[13] 996 st $t1,56($key) 997 x $t1,28($key) # rk[15]=rk[7]^rk[14] 998 st $t1,60($key) 999 1000 srlg $i1,$t1,8 1001 srlg $i2,$t1,16 1002 srlg $i3,$t1,24 1003 nr $t1,$mask 1004 nr $i1,$mask 1005 nr $i2,$mask 1006 1007 la $key,32($key) # key+=8 1008 la $t3,4($t3) # i++ 1009 j .L256_loop 1010 1011 .Lminus1: 1012 lghi %r2,-1 1013 br $ra 1014 .size AES_set_encrypt_key,.-AES_set_encrypt_key 1015 1016 # void AES_set_decrypt_key(const unsigned char *in, int bits, 1017 # AES_KEY *key) { 1018 .globl AES_set_decrypt_key 1019 .type AES_set_decrypt_key,\@function 1020 .align 16 1021 AES_set_decrypt_key: 1022 stg $key,32($sp) # I rely on AES_set_encrypt_key to 1023 stg $ra,112($sp) # save non-volatile registers! 1024 bras $ra,AES_set_encrypt_key 1025 lg $key,32($sp) 1026 lg $ra,112($sp) 1027 ltgr %r2,%r2 1028 bnzr $ra 1029 ___ 1030 $code.=<<___ if (!$softonly); 1031 l $t0,240($key) 1032 lhi $t1,16 1033 cr $t0,$t1 1034 jl .Lgo 1035 oill $t0,0x80 # set "decrypt" bit 1036 st $t0,240($key) 1037 br $ra 1038 1039 .align 16 1040 .Ldkey_internal: 1041 stg $key,32($sp) 1042 stg $ra,40($sp) 1043 bras $ra,.Lekey_internal 1044 lg $key,32($sp) 1045 lg $ra,40($sp) 1046 ___ 1047 $code.=<<___; 1048 1049 .Lgo: llgf $rounds,240($key) 1050 la $i1,0($key) 1051 sllg $i2,$rounds,4 1052 la $i2,0($i2,$key) 1053 srl $rounds,1 1054 lghi $t1,-16 1055 1056 .align 16 1057 .Linv: lmg $s0,$s1,0($i1) 1058 lmg $s2,$s3,0($i2) 1059 stmg $s0,$s1,0($i2) 1060 stmg $s2,$s3,0($i1) 1061 la $i1,16($i1) 1062 la $i2,0($t1,$i2) 1063 brct $rounds,.Linv 1064 ___ 1065 $mask80=$i1; 1066 $mask1b=$i2; 1067 $maskfe=$i3; 1068 $code.=<<___; 1069 llgf $rounds,240($key) 1070 aghi $rounds,-1 1071 sll $rounds,2 # (rounds-1)*4 1072 llilh $mask80,0x8080 1073 llilh $mask1b,0x1b1b 1074 llilh $maskfe,0xfefe 1075 oill $mask80,0x8080 1076 oill $mask1b,0x1b1b 1077 oill $maskfe,0xfefe 1078 1079 .align 16 1080 .Lmix: l $s0,16($key) # tp1 1081 lr $s1,$s0 1082 ngr $s1,$mask80 1083 srlg $t1,$s1,7 1084 slr $s1,$t1 1085 nr $s1,$mask1b 1086 sllg $t1,$s0,1 1087 nr $t1,$maskfe 1088 xr $s1,$t1 # tp2 1089 1090 lr $s2,$s1 1091 ngr $s2,$mask80 1092 srlg $t1,$s2,7 1093 slr $s2,$t1 1094 nr $s2,$mask1b 1095 sllg $t1,$s1,1 1096 nr $t1,$maskfe 1097 xr $s2,$t1 # tp4 1098 1099 lr $s3,$s2 1100 ngr $s3,$mask80 1101 srlg $t1,$s3,7 1102 slr $s3,$t1 1103 nr $s3,$mask1b 1104 sllg $t1,$s2,1 1105 nr $t1,$maskfe 1106 xr $s3,$t1 # tp8 1107 1108 xr $s1,$s0 # tp2^tp1 1109 xr $s2,$s0 # tp4^tp1 1110 rll $s0,$s0,24 # = ROTATE(tp1,8) 1111 xr $s2,$s3 # ^=tp8 1112 xr $s0,$s1 # ^=tp2^tp1 1113 xr $s1,$s3 # tp2^tp1^tp8 1114 xr $s0,$s2 # ^=tp4^tp1^tp8 1115 rll $s1,$s1,8 1116 rll $s2,$s2,16 1117 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24) 1118 rll $s3,$s3,24 1119 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16) 1120 xr $s0,$s3 # ^= ROTATE(tp8,8) 1121 1122 st $s0,16($key) 1123 la $key,4($key) 1124 brct $rounds,.Lmix 1125 1126 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key! 1127 lghi %r2,0 1128 br $ra 1129 .size AES_set_decrypt_key,.-AES_set_decrypt_key 1130 ___ 1131 1132 #void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, 1133 # size_t length, const AES_KEY *key, 1134 # unsigned char *ivec, const int enc) 1135 { 1136 my $inp="%r2"; 1137 my $out="%r4"; # length and out are swapped 1138 my $len="%r3"; 1139 my $key="%r5"; 1140 my $ivp="%r6"; 1141 1142 $code.=<<___; 1143 .globl AES_cbc_encrypt 1144 .type AES_cbc_encrypt,\@function 1145 .align 16 1146 AES_cbc_encrypt: 1147 xgr %r3,%r4 # flip %r3 and %r4, out and len 1148 xgr %r4,%r3 1149 xgr %r3,%r4 1150 ___ 1151 $code.=<<___ if (!$softonly); 1152 lhi %r0,16 1153 cl %r0,240($key) 1154 jh .Lcbc_software 1155 1156 lg %r0,0($ivp) # copy ivec 1157 lg %r1,8($ivp) 1158 stmg %r0,%r1,16($sp) 1159 lmg %r0,%r1,0($key) # copy key, cover 256 bit 1160 stmg %r0,%r1,32($sp) 1161 lmg %r0,%r1,16($key) 1162 stmg %r0,%r1,48($sp) 1163 l %r0,240($key) # load kmc code 1164 lghi $key,15 # res=len%16, len-=res; 1165 ngr $key,$len 1166 slgr $len,$key 1167 la %r1,16($sp) # parameter block - ivec || key 1168 jz .Lkmc_truncated 1169 .long 0xb92f0042 # kmc %r4,%r2 1170 brc 1,.-4 # pay attention to "partial completion" 1171 ltr $key,$key 1172 jnz .Lkmc_truncated 1173 .Lkmc_done: 1174 lmg %r0,%r1,16($sp) # copy ivec to caller 1175 stg %r0,0($ivp) 1176 stg %r1,8($ivp) 1177 br $ra 1178 .align 16 1179 .Lkmc_truncated: 1180 ahi $key,-1 # it's the way it's encoded in mvc 1181 tmll %r0,0x80 1182 jnz .Lkmc_truncated_dec 1183 lghi %r1,0 1184 stg %r1,128($sp) 1185 stg %r1,136($sp) 1186 bras %r1,1f 1187 mvc 128(1,$sp),0($inp) 1188 1: ex $key,0(%r1) 1189 la %r1,16($sp) # restore parameter block 1190 la $inp,128($sp) 1191 lghi $len,16 1192 .long 0xb92f0042 # kmc %r4,%r2 1193 j .Lkmc_done 1194 .align 16 1195 .Lkmc_truncated_dec: 1196 stg $out,64($sp) 1197 la $out,128($sp) 1198 lghi $len,16 1199 .long 0xb92f0042 # kmc %r4,%r2 1200 lg $out,64($sp) 1201 bras %r1,2f 1202 mvc 0(1,$out),128($sp) 1203 2: ex $key,0(%r1) 1204 j .Lkmc_done 1205 .align 16 1206 .Lcbc_software: 1207 ___ 1208 $code.=<<___; 1209 stmg $key,$ra,40($sp) 1210 lhi %r0,0 1211 cl %r0,164($sp) 1212 je .Lcbc_decrypt 1213 1214 larl $tbl,AES_Te 1215 1216 llgf $s0,0($ivp) 1217 llgf $s1,4($ivp) 1218 llgf $s2,8($ivp) 1219 llgf $s3,12($ivp) 1220 1221 lghi $t0,16 1222 slgr $len,$t0 1223 brc 4,.Lcbc_enc_tail # if borrow 1224 .Lcbc_enc_loop: 1225 stmg $inp,$out,16($sp) 1226 x $s0,0($inp) 1227 x $s1,4($inp) 1228 x $s2,8($inp) 1229 x $s3,12($inp) 1230 lgr %r4,$key 1231 1232 bras $ra,_s390x_AES_encrypt 1233 1234 lmg $inp,$key,16($sp) 1235 st $s0,0($out) 1236 st $s1,4($out) 1237 st $s2,8($out) 1238 st $s3,12($out) 1239 1240 la $inp,16($inp) 1241 la $out,16($out) 1242 lghi $t0,16 1243 ltgr $len,$len 1244 jz .Lcbc_enc_done 1245 slgr $len,$t0 1246 brc 4,.Lcbc_enc_tail # if borrow 1247 j .Lcbc_enc_loop 1248 .align 16 1249 .Lcbc_enc_done: 1250 lg $ivp,48($sp) 1251 st $s0,0($ivp) 1252 st $s1,4($ivp) 1253 st $s2,8($ivp) 1254 st $s3,12($ivp) 1255 1256 lmg %r7,$ra,56($sp) 1257 br $ra 1258 1259 .align 16 1260 .Lcbc_enc_tail: 1261 aghi $len,15 1262 lghi $t0,0 1263 stg $t0,128($sp) 1264 stg $t0,136($sp) 1265 bras $t1,3f 1266 mvc 128(1,$sp),0($inp) 1267 3: ex $len,0($t1) 1268 lghi $len,0 1269 la $inp,128($sp) 1270 j .Lcbc_enc_loop 1271 1272 .align 16 1273 .Lcbc_decrypt: 1274 larl $tbl,AES_Td 1275 1276 lg $t0,0($ivp) 1277 lg $t1,8($ivp) 1278 stmg $t0,$t1,128($sp) 1279 1280 .Lcbc_dec_loop: 1281 stmg $inp,$out,16($sp) 1282 llgf $s0,0($inp) 1283 llgf $s1,4($inp) 1284 llgf $s2,8($inp) 1285 llgf $s3,12($inp) 1286 lgr %r4,$key 1287 1288 bras $ra,_s390x_AES_decrypt 1289 1290 lmg $inp,$key,16($sp) 1291 sllg $s0,$s0,32 1292 sllg $s2,$s2,32 1293 lr $s0,$s1 1294 lr $s2,$s3 1295 1296 lg $t0,0($inp) 1297 lg $t1,8($inp) 1298 xg $s0,128($sp) 1299 xg $s2,136($sp) 1300 lghi $s1,16 1301 slgr $len,$s1 1302 brc 4,.Lcbc_dec_tail # if borrow 1303 brc 2,.Lcbc_dec_done # if zero 1304 stg $s0,0($out) 1305 stg $s2,8($out) 1306 stmg $t0,$t1,128($sp) 1307 1308 la $inp,16($inp) 1309 la $out,16($out) 1310 j .Lcbc_dec_loop 1311 1312 .Lcbc_dec_done: 1313 stg $s0,0($out) 1314 stg $s2,8($out) 1315 .Lcbc_dec_exit: 1316 lmg $ivp,$ra,48($sp) 1317 stmg $t0,$t1,0($ivp) 1318 1319 br $ra 1320 1321 .align 16 1322 .Lcbc_dec_tail: 1323 aghi $len,15 1324 stg $s0,128($sp) 1325 stg $s2,136($sp) 1326 bras $s1,4f 1327 mvc 0(1,$out),128($sp) 1328 4: ex $len,0($s1) 1329 j .Lcbc_dec_exit 1330 .size AES_cbc_encrypt,.-AES_cbc_encrypt 1331 .comm OPENSSL_s390xcap_P,8,8 1332 ___ 1333 } 1334 $code.=<<___; 1335 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" 1336 ___ 1337 1338 $code =~ s/\`([^\`]*)\`/eval $1/gem; 1339 print $code; 1340