Home | History | Annotate | Download | only in rijndael
      1 /*	$NetBSD: rijndael-alg-fst.c,v 1.4 2006/09/09 16:22:36 manu Exp $	*/
      2 
      3 /*	$KAME: rijndael-alg-fst.c,v 1.1.1.1 2001/08/08 09:56:23 sakane Exp $	*/
      4 
      5 /*
      6  * rijndael-alg-fst.c   v2.3   April '2000
      7  *
      8  * Optimised ANSI C code
      9  *
     10  * authors: v1.0: Antoon Bosselaers
     11  *          v2.0: Vincent Rijmen
     12  *          v2.3: Paulo Barreto
     13  *
     14  * This code is placed in the public domain.
     15  */
     16 
     17 #include "config.h"
     18 
     19 #include <sys/cdefs.h>
     20 #include <sys/types.h>
     21 #ifdef _KERNEL
     22 #include <sys/systm.h>
     23 #else
     24 #include <string.h>
     25 #endif
     26 #include <crypto/rijndael/rijndael-alg-fst.h>
     27 #include <crypto/rijndael/rijndael_local.h>
     28 
     29 #include <crypto/rijndael/boxes-fst.dat>
     30 
     31 #include <err.h>
     32 #define bcopy(a, b, c) memcpy((b), (a), (c))
     33 #define bzero(a, b) memset((a), 0, (b))
     34 #define panic(a) err(1, (a))
     35 
     36 int rijndaelKeySched(word8 k[MAXKC][4], word8 W[MAXROUNDS+1][4][4], int ROUNDS) {
     37 	/* Calculate the necessary round keys
     38 	 * The number of calculations depends on keyBits and blockBits
     39 	 */
     40 	int j, r, t, rconpointer = 0;
     41 	union {
     42 		word8	x8[MAXKC][4];
     43 		word32	x32[MAXKC];
     44 	} xtk;
     45 #define	tk	xtk.x8
     46 	int KC = ROUNDS - 6;
     47 
     48 	for (j = KC-1; j >= 0; j--) {
     49 		*((word32*)tk[j]) = *((word32*)k[j]);
     50 	}
     51 	r = 0;
     52 	t = 0;
     53 	/* copy values into round key array */
     54 	for (j = 0; (j < KC) && (r < ROUNDS + 1); ) {
     55 		for (; (j < KC) && (t < 4); j++, t++) {
     56 			*((word32*)W[r][t]) = *((word32*)tk[j]);
     57 		}
     58 		if (t == 4) {
     59 			r++;
     60 			t = 0;
     61 		}
     62 	}
     63 
     64 	while (r < ROUNDS + 1) { /* while not enough round key material calculated */
     65 		/* calculate new values */
     66 		tk[0][0] ^= S[tk[KC-1][1]];
     67 		tk[0][1] ^= S[tk[KC-1][2]];
     68 		tk[0][2] ^= S[tk[KC-1][3]];
     69 		tk[0][3] ^= S[tk[KC-1][0]];
     70 		tk[0][0] ^= rcon[rconpointer++];
     71 
     72 		if (KC != 8) {
     73 			for (j = 1; j < KC; j++) {
     74 				*((word32*)tk[j]) ^= *((word32*)tk[j-1]);
     75 			}
     76 		} else {
     77 			for (j = 1; j < KC/2; j++) {
     78 				*((word32*)tk[j]) ^= *((word32*)tk[j-1]);
     79 			}
     80 			tk[KC/2][0] ^= S[tk[KC/2 - 1][0]];
     81 			tk[KC/2][1] ^= S[tk[KC/2 - 1][1]];
     82 			tk[KC/2][2] ^= S[tk[KC/2 - 1][2]];
     83 			tk[KC/2][3] ^= S[tk[KC/2 - 1][3]];
     84 			for (j = KC/2 + 1; j < KC; j++) {
     85 				*((word32*)tk[j]) ^= *((word32*)tk[j-1]);
     86 			}
     87 		}
     88 		/* copy values into round key array */
     89 		for (j = 0; (j < KC) && (r < ROUNDS + 1); ) {
     90 			for (; (j < KC) && (t < 4); j++, t++) {
     91 				*((word32*)W[r][t]) = *((word32*)tk[j]);
     92 			}
     93 			if (t == 4) {
     94 				r++;
     95 				t = 0;
     96 			}
     97 		}
     98 	}
     99 	return 0;
    100 #undef tk
    101 }
    102 
    103 int rijndaelKeyEncToDec(word8 W[MAXROUNDS+1][4][4], int ROUNDS) {
    104 	int r;
    105 	word8 *w;
    106 
    107 	for (r = 1; r < ROUNDS; r++) {
    108 		w = W[r][0];
    109 		*((word32*)w) =
    110 			  *((const word32*)U1[w[0]])
    111 			^ *((const word32*)U2[w[1]])
    112 			^ *((const word32*)U3[w[2]])
    113 			^ *((const word32*)U4[w[3]]);
    114 
    115 		w = W[r][1];
    116 		*((word32*)w) =
    117 			  *((const word32*)U1[w[0]])
    118 			^ *((const word32*)U2[w[1]])
    119 			^ *((const word32*)U3[w[2]])
    120 			^ *((const word32*)U4[w[3]]);
    121 
    122 		w = W[r][2];
    123 		*((word32*)w) =
    124 			  *((const word32*)U1[w[0]])
    125 			^ *((const word32*)U2[w[1]])
    126 			^ *((const word32*)U3[w[2]])
    127 			^ *((const word32*)U4[w[3]]);
    128 
    129 		w = W[r][3];
    130 		*((word32*)w) =
    131 			  *((const word32*)U1[w[0]])
    132 			^ *((const word32*)U2[w[1]])
    133 			^ *((const word32*)U3[w[2]])
    134 			^ *((const word32*)U4[w[3]]);
    135 	}
    136 	return 0;
    137 }
    138 
    139 /**
    140  * Encrypt a single block.
    141  */
    142 int rijndaelEncrypt(word8 in[16], word8 out[16], word8 rk[MAXROUNDS+1][4][4], int ROUNDS) {
    143 	int r;
    144 	union {
    145 		word8	x8[16];
    146 		word32	x32[4];
    147 	} xa, xb;
    148 #define	a	xa.x8
    149 #define	b	xb.x8
    150 	union {
    151 		word8	x8[4][4];
    152 		word32	x32[4];
    153 	} xtemp;
    154 #define	temp	xtemp.x8
    155 
    156     memcpy(a, in, sizeof a);
    157 
    158     *((word32*)temp[0]) = *((word32*)(a   )) ^ *((word32*)rk[0][0]);
    159     *((word32*)temp[1]) = *((word32*)(a+ 4)) ^ *((word32*)rk[0][1]);
    160     *((word32*)temp[2]) = *((word32*)(a+ 8)) ^ *((word32*)rk[0][2]);
    161     *((word32*)temp[3]) = *((word32*)(a+12)) ^ *((word32*)rk[0][3]);
    162     *((word32*)(b    )) = *((const word32*)T1[temp[0][0]])
    163 					^ *((const word32*)T2[temp[1][1]])
    164 					^ *((const word32*)T3[temp[2][2]])
    165 					^ *((const word32*)T4[temp[3][3]]);
    166     *((word32*)(b + 4)) = *((const word32*)T1[temp[1][0]])
    167 					^ *((const word32*)T2[temp[2][1]])
    168 					^ *((const word32*)T3[temp[3][2]])
    169 					^ *((const word32*)T4[temp[0][3]]);
    170     *((word32*)(b + 8)) = *((const word32*)T1[temp[2][0]])
    171 					^ *((const word32*)T2[temp[3][1]])
    172 					^ *((const word32*)T3[temp[0][2]])
    173 					^ *((const word32*)T4[temp[1][3]]);
    174     *((word32*)(b +12)) = *((const word32*)T1[temp[3][0]])
    175 					^ *((const word32*)T2[temp[0][1]])
    176 					^ *((const word32*)T3[temp[1][2]])
    177 					^ *((const word32*)T4[temp[2][3]]);
    178 	for (r = 1; r < ROUNDS-1; r++) {
    179 		*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[r][0]);
    180 		*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[r][1]);
    181 		*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[r][2]);
    182 		*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[r][3]);
    183 
    184 		*((word32*)(b    )) = *((const word32*)T1[temp[0][0]])
    185 					^ *((const word32*)T2[temp[1][1]])
    186 					^ *((const word32*)T3[temp[2][2]])
    187 					^ *((const word32*)T4[temp[3][3]]);
    188 		*((word32*)(b + 4)) = *((const word32*)T1[temp[1][0]])
    189 					^ *((const word32*)T2[temp[2][1]])
    190 					^ *((const word32*)T3[temp[3][2]])
    191 					^ *((const word32*)T4[temp[0][3]]);
    192 		*((word32*)(b + 8)) = *((const word32*)T1[temp[2][0]])
    193 					^ *((const word32*)T2[temp[3][1]])
    194 					^ *((const word32*)T3[temp[0][2]])
    195 					^ *((const word32*)T4[temp[1][3]]);
    196 		*((word32*)(b +12)) = *((const word32*)T1[temp[3][0]])
    197 					^ *((const word32*)T2[temp[0][1]])
    198 					^ *((const word32*)T3[temp[1][2]])
    199 					^ *((const word32*)T4[temp[2][3]]);
    200 	}
    201 	/* last round is special */
    202 	*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[ROUNDS-1][0]);
    203 	*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[ROUNDS-1][1]);
    204 	*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[ROUNDS-1][2]);
    205 	*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[ROUNDS-1][3]);
    206 	b[ 0] = T1[temp[0][0]][1];
    207 	b[ 1] = T1[temp[1][1]][1];
    208 	b[ 2] = T1[temp[2][2]][1];
    209 	b[ 3] = T1[temp[3][3]][1];
    210 	b[ 4] = T1[temp[1][0]][1];
    211 	b[ 5] = T1[temp[2][1]][1];
    212 	b[ 6] = T1[temp[3][2]][1];
    213 	b[ 7] = T1[temp[0][3]][1];
    214 	b[ 8] = T1[temp[2][0]][1];
    215 	b[ 9] = T1[temp[3][1]][1];
    216 	b[10] = T1[temp[0][2]][1];
    217 	b[11] = T1[temp[1][3]][1];
    218 	b[12] = T1[temp[3][0]][1];
    219 	b[13] = T1[temp[0][1]][1];
    220 	b[14] = T1[temp[1][2]][1];
    221 	b[15] = T1[temp[2][3]][1];
    222 	*((word32*)(b   )) ^= *((word32*)rk[ROUNDS][0]);
    223 	*((word32*)(b+ 4)) ^= *((word32*)rk[ROUNDS][1]);
    224 	*((word32*)(b+ 8)) ^= *((word32*)rk[ROUNDS][2]);
    225 	*((word32*)(b+12)) ^= *((word32*)rk[ROUNDS][3]);
    226 
    227 	memcpy(out, b, sizeof b /* XXX out */);
    228 
    229 	return 0;
    230 #undef a
    231 #undef b
    232 #undef temp
    233 }
    234 
    235 #ifdef INTERMEDIATE_VALUE_KAT
    236 /**
    237  * Encrypt only a certain number of rounds.
    238  * Only used in the Intermediate Value Known Answer Test.
    239  */
    240 int rijndaelEncryptRound(word8 a[4][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS, int rounds) {
    241 	int r;
    242 	word8 temp[4][4];
    243 
    244 	/* make number of rounds sane */
    245 	if (rounds > ROUNDS) {
    246 		rounds = ROUNDS;
    247 	}
    248 
    249 	*((word32*)a[0]) = *((word32*)a[0]) ^ *((word32*)rk[0][0]);
    250 	*((word32*)a[1]) = *((word32*)a[1]) ^ *((word32*)rk[0][1]);
    251 	*((word32*)a[2]) = *((word32*)a[2]) ^ *((word32*)rk[0][2]);
    252 	*((word32*)a[3]) = *((word32*)a[3]) ^ *((word32*)rk[0][3]);
    253 
    254 	for (r = 1; (r <= rounds) && (r < ROUNDS); r++) {
    255 		*((word32*)temp[0]) = *((const word32*)T1[a[0][0]])
    256 					   ^ *((const word32*)T2[a[1][1]])
    257 					   ^ *((const word32*)T3[a[2][2]])
    258 					   ^ *((const word32*)T4[a[3][3]]);
    259 		*((word32*)temp[1]) = *((const word32*)T1[a[1][0]])
    260 					   ^ *((const word32*)T2[a[2][1]])
    261 					   ^ *((const word32*)T3[a[3][2]])
    262 					   ^ *((const word32*)T4[a[0][3]]);
    263 		*((word32*)temp[2]) = *((const word32*)T1[a[2][0]])
    264 					   ^ *((const word32*)T2[a[3][1]])
    265 					   ^ *((const word32*)T3[a[0][2]])
    266 					   ^ *((const word32*)T4[a[1][3]]);
    267 		*((word32*)temp[3]) = *((const word32*)T1[a[3][0]])
    268 					   ^ *((const word32*)T2[a[0][1]])
    269 					   ^ *((const word32*)T3[a[1][2]])
    270 					   ^ *((const word32*)T4[a[2][3]]);
    271 		*((word32*)a[0]) = *((word32*)temp[0]) ^ *((word32*)rk[r][0]);
    272 		*((word32*)a[1]) = *((word32*)temp[1]) ^ *((word32*)rk[r][1]);
    273 		*((word32*)a[2]) = *((word32*)temp[2]) ^ *((word32*)rk[r][2]);
    274 		*((word32*)a[3]) = *((word32*)temp[3]) ^ *((word32*)rk[r][3]);
    275 	}
    276 	if (rounds == ROUNDS) {
    277 	   	/* last round is special */
    278 	   	temp[0][0] = T1[a[0][0]][1];
    279 	   	temp[0][1] = T1[a[1][1]][1];
    280 	   	temp[0][2] = T1[a[2][2]][1];
    281 	   	temp[0][3] = T1[a[3][3]][1];
    282 	   	temp[1][0] = T1[a[1][0]][1];
    283 	   	temp[1][1] = T1[a[2][1]][1];
    284 	   	temp[1][2] = T1[a[3][2]][1];
    285 	   	temp[1][3] = T1[a[0][3]][1];
    286 	   	temp[2][0] = T1[a[2][0]][1];
    287 	   	temp[2][1] = T1[a[3][1]][1];
    288 	   	temp[2][2] = T1[a[0][2]][1];
    289 	   	temp[2][3] = T1[a[1][3]][1];
    290 	   	temp[3][0] = T1[a[3][0]][1];
    291 	   	temp[3][1] = T1[a[0][1]][1];
    292 	   	temp[3][2] = T1[a[1][2]][1];
    293 	   	temp[3][3] = T1[a[2][3]][1];
    294 		*((word32*)a[0]) = *((word32*)temp[0]) ^ *((word32*)rk[ROUNDS][0]);
    295 		*((word32*)a[1]) = *((word32*)temp[1]) ^ *((word32*)rk[ROUNDS][1]);
    296 		*((word32*)a[2]) = *((word32*)temp[2]) ^ *((word32*)rk[ROUNDS][2]);
    297 		*((word32*)a[3]) = *((word32*)temp[3]) ^ *((word32*)rk[ROUNDS][3]);
    298 	}
    299 
    300 	return 0;
    301 }
    302 #endif /* INTERMEDIATE_VALUE_KAT */
    303 
    304 /**
    305  * Decrypt a single block.
    306  */
    307 int rijndaelDecrypt(word8 in[16], word8 out[16], word8 rk[MAXROUNDS+1][4][4], int ROUNDS) {
    308 	int r;
    309 	union {
    310 		word8	x8[16];
    311 		word32	x32[4];
    312 	} xa, xb;
    313 #define	a	xa.x8
    314 #define	b	xb.x8
    315 	union {
    316 		word8	x8[4][4];
    317 		word32	x32[4];
    318 	} xtemp;
    319 #define	temp	xtemp.x8
    320 
    321     memcpy(a, in, sizeof a);
    322 
    323     *((word32*)temp[0]) = *((word32*)(a   )) ^ *((word32*)rk[ROUNDS][0]);
    324     *((word32*)temp[1]) = *((word32*)(a+ 4)) ^ *((word32*)rk[ROUNDS][1]);
    325     *((word32*)temp[2]) = *((word32*)(a+ 8)) ^ *((word32*)rk[ROUNDS][2]);
    326     *((word32*)temp[3]) = *((word32*)(a+12)) ^ *((word32*)rk[ROUNDS][3]);
    327 
    328     *((word32*)(b   )) = *((const word32*)T5[temp[0][0]])
    329            ^ *((const word32*)T6[temp[3][1]])
    330            ^ *((const word32*)T7[temp[2][2]])
    331            ^ *((const word32*)T8[temp[1][3]]);
    332 	*((word32*)(b+ 4)) = *((const word32*)T5[temp[1][0]])
    333            ^ *((const word32*)T6[temp[0][1]])
    334            ^ *((const word32*)T7[temp[3][2]])
    335            ^ *((const word32*)T8[temp[2][3]]);
    336 	*((word32*)(b+ 8)) = *((const word32*)T5[temp[2][0]])
    337            ^ *((const word32*)T6[temp[1][1]])
    338            ^ *((const word32*)T7[temp[0][2]])
    339            ^ *((const word32*)T8[temp[3][3]]);
    340 	*((word32*)(b+12)) = *((const word32*)T5[temp[3][0]])
    341            ^ *((const word32*)T6[temp[2][1]])
    342            ^ *((const word32*)T7[temp[1][2]])
    343            ^ *((const word32*)T8[temp[0][3]]);
    344 	for (r = ROUNDS-1; r > 1; r--) {
    345 		*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[r][0]);
    346 		*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[r][1]);
    347 		*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[r][2]);
    348 		*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[r][3]);
    349 		*((word32*)(b   )) = *((const word32*)T5[temp[0][0]])
    350 		   ^ *((const word32*)T6[temp[3][1]])
    351 		   ^ *((const word32*)T7[temp[2][2]])
    352 		   ^ *((const word32*)T8[temp[1][3]]);
    353 		*((word32*)(b+ 4)) = *((const word32*)T5[temp[1][0]])
    354 		   ^ *((const word32*)T6[temp[0][1]])
    355 		   ^ *((const word32*)T7[temp[3][2]])
    356 		   ^ *((const word32*)T8[temp[2][3]]);
    357 		*((word32*)(b+ 8)) = *((const word32*)T5[temp[2][0]])
    358 		   ^ *((const word32*)T6[temp[1][1]])
    359 		   ^ *((const word32*)T7[temp[0][2]])
    360 		   ^ *((const word32*)T8[temp[3][3]]);
    361 		*((word32*)(b+12)) = *((const word32*)T5[temp[3][0]])
    362 		   ^ *((const word32*)T6[temp[2][1]])
    363 		   ^ *((const word32*)T7[temp[1][2]])
    364 		   ^ *((const word32*)T8[temp[0][3]]);
    365 	}
    366 	/* last round is special */
    367 	*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[1][0]);
    368 	*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[1][1]);
    369 	*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[1][2]);
    370 	*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[1][3]);
    371 	b[ 0] = S5[temp[0][0]];
    372 	b[ 1] = S5[temp[3][1]];
    373 	b[ 2] = S5[temp[2][2]];
    374 	b[ 3] = S5[temp[1][3]];
    375 	b[ 4] = S5[temp[1][0]];
    376 	b[ 5] = S5[temp[0][1]];
    377 	b[ 6] = S5[temp[3][2]];
    378 	b[ 7] = S5[temp[2][3]];
    379 	b[ 8] = S5[temp[2][0]];
    380 	b[ 9] = S5[temp[1][1]];
    381 	b[10] = S5[temp[0][2]];
    382 	b[11] = S5[temp[3][3]];
    383 	b[12] = S5[temp[3][0]];
    384 	b[13] = S5[temp[2][1]];
    385 	b[14] = S5[temp[1][2]];
    386 	b[15] = S5[temp[0][3]];
    387 	*((word32*)(b   )) ^= *((word32*)rk[0][0]);
    388 	*((word32*)(b+ 4)) ^= *((word32*)rk[0][1]);
    389 	*((word32*)(b+ 8)) ^= *((word32*)rk[0][2]);
    390 	*((word32*)(b+12)) ^= *((word32*)rk[0][3]);
    391 
    392 	memcpy(out, b, sizeof b /* XXX out */);
    393 
    394 	return 0;
    395 #undef a
    396 #undef b
    397 #undef temp
    398 }
    399 
    400 
    401 #ifdef INTERMEDIATE_VALUE_KAT
    402 /**
    403  * Decrypt only a certain number of rounds.
    404  * Only used in the Intermediate Value Known Answer Test.
    405  * Operations rearranged such that the intermediate values
    406  * of decryption correspond with the intermediate values
    407  * of encryption.
    408  */
    409 int rijndaelDecryptRound(word8 a[4][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS, int rounds) {
    410 	int r, i;
    411 	word8 temp[4], shift;
    412 
    413 	/* make number of rounds sane */
    414 	if (rounds > ROUNDS) {
    415 		rounds = ROUNDS;
    416 	}
    417     /* first round is special: */
    418 	*(word32 *)a[0] ^= *(word32 *)rk[ROUNDS][0];
    419 	*(word32 *)a[1] ^= *(word32 *)rk[ROUNDS][1];
    420 	*(word32 *)a[2] ^= *(word32 *)rk[ROUNDS][2];
    421 	*(word32 *)a[3] ^= *(word32 *)rk[ROUNDS][3];
    422 	for (i = 0; i < 4; i++) {
    423 		a[i][0] = Si[a[i][0]];
    424 		a[i][1] = Si[a[i][1]];
    425 		a[i][2] = Si[a[i][2]];
    426 		a[i][3] = Si[a[i][3]];
    427 	}
    428 	for (i = 1; i < 4; i++) {
    429 		shift = (4 - i) & 3;
    430 		temp[0] = a[(0 + shift) & 3][i];
    431 		temp[1] = a[(1 + shift) & 3][i];
    432 		temp[2] = a[(2 + shift) & 3][i];
    433 		temp[3] = a[(3 + shift) & 3][i];
    434 		a[0][i] = temp[0];
    435 		a[1][i] = temp[1];
    436 		a[2][i] = temp[2];
    437 		a[3][i] = temp[3];
    438 	}
    439 	/* ROUNDS-1 ordinary rounds */
    440 	for (r = ROUNDS-1; r > rounds; r--) {
    441 		*(word32 *)a[0] ^= *(word32 *)rk[r][0];
    442 		*(word32 *)a[1] ^= *(word32 *)rk[r][1];
    443 		*(word32 *)a[2] ^= *(word32 *)rk[r][2];
    444 		*(word32 *)a[3] ^= *(word32 *)rk[r][3];
    445 
    446 		*((word32*)a[0]) =
    447 			  *((const word32*)U1[a[0][0]])
    448 			^ *((const word32*)U2[a[0][1]])
    449 			^ *((const word32*)U3[a[0][2]])
    450 			^ *((const word32*)U4[a[0][3]]);
    451 
    452 		*((word32*)a[1]) =
    453 			  *((const word32*)U1[a[1][0]])
    454 			^ *((const word32*)U2[a[1][1]])
    455 			^ *((const word32*)U3[a[1][2]])
    456 			^ *((const word32*)U4[a[1][3]]);
    457 
    458 		*((word32*)a[2]) =
    459 			  *((const word32*)U1[a[2][0]])
    460 			^ *((const word32*)U2[a[2][1]])
    461 			^ *((const word32*)U3[a[2][2]])
    462 			^ *((const word32*)U4[a[2][3]]);
    463 
    464 		*((word32*)a[3]) =
    465 			  *((const word32*)U1[a[3][0]])
    466 			^ *((const word32*)U2[a[3][1]])
    467 			^ *((const word32*)U3[a[3][2]])
    468 			^ *((const word32*)U4[a[3][3]]);
    469 		for (i = 0; i < 4; i++) {
    470 			a[i][0] = Si[a[i][0]];
    471 			a[i][1] = Si[a[i][1]];
    472 			a[i][2] = Si[a[i][2]];
    473 			a[i][3] = Si[a[i][3]];
    474 		}
    475 		for (i = 1; i < 4; i++) {
    476 			shift = (4 - i) & 3;
    477 			temp[0] = a[(0 + shift) & 3][i];
    478 			temp[1] = a[(1 + shift) & 3][i];
    479 			temp[2] = a[(2 + shift) & 3][i];
    480 			temp[3] = a[(3 + shift) & 3][i];
    481 			a[0][i] = temp[0];
    482 			a[1][i] = temp[1];
    483 			a[2][i] = temp[2];
    484 			a[3][i] = temp[3];
    485 		}
    486 	}
    487 	if (rounds == 0) {
    488 		/* End with the extra key addition */
    489 		*(word32 *)a[0] ^= *(word32 *)rk[0][0];
    490 		*(word32 *)a[1] ^= *(word32 *)rk[0][1];
    491 		*(word32 *)a[2] ^= *(word32 *)rk[0][2];
    492 		*(word32 *)a[3] ^= *(word32 *)rk[0][3];
    493 	}
    494 	return 0;
    495 }
    496 #endif /* INTERMEDIATE_VALUE_KAT */
    497