Home | History | Annotate | Download | only in rc4
      1 /* crypto/rc4/rc4_enc.c */
      2 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com)
      3  * All rights reserved.
      4  *
      5  * This package is an SSL implementation written
      6  * by Eric Young (eay (at) cryptsoft.com).
      7  * The implementation was written so as to conform with Netscapes SSL.
      8  *
      9  * This library is free for commercial and non-commercial use as long as
     10  * the following conditions are aheared to.  The following conditions
     11  * apply to all code found in this distribution, be it the RC4, RSA,
     12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
     13  * included with this distribution is covered by the same copyright terms
     14  * except that the holder is Tim Hudson (tjh (at) cryptsoft.com).
     15  *
     16  * Copyright remains Eric Young's, and as such any Copyright notices in
     17  * the code are not to be removed.
     18  * If this package is used in a product, Eric Young should be given attribution
     19  * as the author of the parts of the library used.
     20  * This can be in the form of a textual message at program startup or
     21  * in documentation (online or textual) provided with the package.
     22  *
     23  * Redistribution and use in source and binary forms, with or without
     24  * modification, are permitted provided that the following conditions
     25  * are met:
     26  * 1. Redistributions of source code must retain the copyright
     27  *    notice, this list of conditions and the following disclaimer.
     28  * 2. Redistributions in binary form must reproduce the above copyright
     29  *    notice, this list of conditions and the following disclaimer in the
     30  *    documentation and/or other materials provided with the distribution.
     31  * 3. All advertising materials mentioning features or use of this software
     32  *    must display the following acknowledgement:
     33  *    "This product includes cryptographic software written by
     34  *     Eric Young (eay (at) cryptsoft.com)"
     35  *    The word 'cryptographic' can be left out if the rouines from the library
     36  *    being used are not cryptographic related :-).
     37  * 4. If you include any Windows specific code (or a derivative thereof) from
     38  *    the apps directory (application code) you must include an acknowledgement:
     39  *    "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)"
     40  *
     41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
     42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     51  * SUCH DAMAGE.
     52  *
     53  * The licence and distribution terms for any publically available version or
     54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
     55  * copied and put under another distribution licence
     56  * [including the GNU Public Licence.]
     57  */
     58 
     59 #include <openssl/rc4.h>
     60 #include "rc4_locl.h"
     61 
     62 /* RC4 as implemented from a posting from
     63  * Newsgroups: sci.crypt
     64  * From: sterndark (at) netcom.com (David Sterndark)
     65  * Subject: RC4 Algorithm revealed.
     66  * Message-ID: <sternCvKL4B.Hyy (at) netcom.com>
     67  * Date: Wed, 14 Sep 1994 06:35:31 GMT
     68  */
     69 
     70 void RC4(RC4_KEY *key, size_t len, const unsigned char *indata,
     71 	     unsigned char *outdata)
     72 	{
     73         register RC4_INT *d;
     74         register RC4_INT x,y,tx,ty;
     75 	size_t i;
     76 
     77         x=key->x;
     78         y=key->y;
     79         d=key->data;
     80 
     81 #if defined(RC4_CHUNK)
     82 	/*
     83 	 * The original reason for implementing this(*) was the fact that
     84 	 * pre-21164a Alpha CPUs don't have byte load/store instructions
     85 	 * and e.g. a byte store has to be done with 64-bit load, shift,
     86 	 * and, or and finally 64-bit store. Peaking data and operating
     87 	 * at natural word size made it possible to reduce amount of
     88 	 * instructions as well as to perform early read-ahead without
     89 	 * suffering from RAW (read-after-write) hazard. This resulted
     90 	 * in ~40%(**) performance improvement on 21064 box with gcc.
     91 	 * But it's not only Alpha users who win here:-) Thanks to the
     92 	 * early-n-wide read-ahead this implementation also exhibits
     93 	 * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
     94 	 * on sizeof(RC4_INT)).
     95 	 *
     96 	 * (*)	"this" means code which recognizes the case when input
     97 	 *	and output pointers appear to be aligned at natural CPU
     98 	 *	word boundary
     99 	 * (**)	i.e. according to 'apps/openssl speed rc4' benchmark,
    100 	 *	crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
    101 	 *
    102 	 * Cavets.
    103 	 *
    104 	 * - RC4_CHUNK="unsigned long long" should be a #1 choice for
    105 	 *   UltraSPARC. Unfortunately gcc generates very slow code
    106 	 *   (2.5-3 times slower than one generated by Sun's WorkShop
    107 	 *   C) and therefore gcc (at least 2.95 and earlier) should
    108 	 *   always be told that RC4_CHUNK="unsigned long".
    109 	 *
    110 	 *					<appro (at) fy.chalmers.se>
    111 	 */
    112 
    113 # define RC4_STEP	( \
    114 			x=(x+1) &0xff,	\
    115 			tx=d[x],	\
    116 			y=(tx+y)&0xff,	\
    117 			ty=d[y],	\
    118 			d[y]=tx,	\
    119 			d[x]=ty,	\
    120 			(RC4_CHUNK)d[(tx+ty)&0xff]\
    121 			)
    122 
    123 	if ( ( ((size_t)indata  & (sizeof(RC4_CHUNK)-1)) |
    124 	       ((size_t)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 )
    125 		{
    126 		RC4_CHUNK ichunk,otp;
    127 		const union { long one; char little; } is_endian = {1};
    128 
    129 		/*
    130 		 * I reckon we can afford to implement both endian
    131 		 * cases and to decide which way to take at run-time
    132 		 * because the machine code appears to be very compact
    133 		 * and redundant 1-2KB is perfectly tolerable (i.e.
    134 		 * in case the compiler fails to eliminate it:-). By
    135 		 * suggestion from Terrel Larson <terr (at) terralogic.net>
    136 		 * who also stands for the is_endian union:-)
    137 		 *
    138 		 * Special notes.
    139 		 *
    140 		 * - is_endian is declared automatic as doing otherwise
    141 		 *   (declaring static) prevents gcc from eliminating
    142 		 *   the redundant code;
    143 		 * - compilers (those I've tried) don't seem to have
    144 		 *   problems eliminating either the operators guarded
    145 		 *   by "if (sizeof(RC4_CHUNK)==8)" or the condition
    146 		 *   expressions themselves so I've got 'em to replace
    147 		 *   corresponding #ifdefs from the previous version;
    148 		 * - I chose to let the redundant switch cases when
    149 		 *   sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
    150 		 *   before);
    151 		 * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
    152 		 *   [LB]ESHFT guards against "shift is out of range"
    153 		 *   warnings when sizeof(RC4_CHUNK)!=8
    154 		 *
    155 		 *			<appro (at) fy.chalmers.se>
    156 		 */
    157 		if (!is_endian.little)
    158 			{	/* BIG-ENDIAN CASE */
    159 # define BESHFT(c)	(((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
    160 			for (;len&(0-sizeof(RC4_CHUNK));len-=sizeof(RC4_CHUNK))
    161 				{
    162 				ichunk  = *(RC4_CHUNK *)indata;
    163 				otp  = RC4_STEP<<BESHFT(0);
    164 				otp |= RC4_STEP<<BESHFT(1);
    165 				otp |= RC4_STEP<<BESHFT(2);
    166 				otp |= RC4_STEP<<BESHFT(3);
    167 				if (sizeof(RC4_CHUNK)==8)
    168 					{
    169 					otp |= RC4_STEP<<BESHFT(4);
    170 					otp |= RC4_STEP<<BESHFT(5);
    171 					otp |= RC4_STEP<<BESHFT(6);
    172 					otp |= RC4_STEP<<BESHFT(7);
    173 					}
    174 				*(RC4_CHUNK *)outdata = otp^ichunk;
    175 				indata  += sizeof(RC4_CHUNK);
    176 				outdata += sizeof(RC4_CHUNK);
    177 				}
    178 			if (len)
    179 				{
    180 				RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk;
    181 
    182 				ichunk = *(RC4_CHUNK *)indata;
    183 				ochunk = *(RC4_CHUNK *)outdata;
    184 				otp = 0;
    185 				i = BESHFT(0);
    186 				mask <<= (sizeof(RC4_CHUNK)-len)<<3;
    187 				switch (len&(sizeof(RC4_CHUNK)-1))
    188 					{
    189 					case 7:	otp  = RC4_STEP<<i, i-=8;
    190 					case 6:	otp |= RC4_STEP<<i, i-=8;
    191 					case 5:	otp |= RC4_STEP<<i, i-=8;
    192 					case 4:	otp |= RC4_STEP<<i, i-=8;
    193 					case 3:	otp |= RC4_STEP<<i, i-=8;
    194 					case 2:	otp |= RC4_STEP<<i, i-=8;
    195 					case 1:	otp |= RC4_STEP<<i, i-=8;
    196 					case 0: ; /*
    197 						   * it's never the case,
    198 						   * but it has to be here
    199 						   * for ultrix?
    200 						   */
    201 					}
    202 				ochunk &= ~mask;
    203 				ochunk |= (otp^ichunk) & mask;
    204 				*(RC4_CHUNK *)outdata = ochunk;
    205 				}
    206 			key->x=x;
    207 			key->y=y;
    208 			return;
    209 			}
    210 		else
    211 			{	/* LITTLE-ENDIAN CASE */
    212 # define LESHFT(c)	(((c)*8)&(sizeof(RC4_CHUNK)*8-1))
    213 			for (;len&(0-sizeof(RC4_CHUNK));len-=sizeof(RC4_CHUNK))
    214 				{
    215 				ichunk  = *(RC4_CHUNK *)indata;
    216 				otp  = RC4_STEP;
    217 				otp |= RC4_STEP<<8;
    218 				otp |= RC4_STEP<<16;
    219 				otp |= RC4_STEP<<24;
    220 				if (sizeof(RC4_CHUNK)==8)
    221 					{
    222 					otp |= RC4_STEP<<LESHFT(4);
    223 					otp |= RC4_STEP<<LESHFT(5);
    224 					otp |= RC4_STEP<<LESHFT(6);
    225 					otp |= RC4_STEP<<LESHFT(7);
    226 					}
    227 				*(RC4_CHUNK *)outdata = otp^ichunk;
    228 				indata  += sizeof(RC4_CHUNK);
    229 				outdata += sizeof(RC4_CHUNK);
    230 				}
    231 			if (len)
    232 				{
    233 				RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk;
    234 
    235 				ichunk = *(RC4_CHUNK *)indata;
    236 				ochunk = *(RC4_CHUNK *)outdata;
    237 				otp = 0;
    238 				i   = 0;
    239 				mask >>= (sizeof(RC4_CHUNK)-len)<<3;
    240 				switch (len&(sizeof(RC4_CHUNK)-1))
    241 					{
    242 					case 7:	otp  = RC4_STEP,    i+=8;
    243 					case 6:	otp |= RC4_STEP<<i, i+=8;
    244 					case 5:	otp |= RC4_STEP<<i, i+=8;
    245 					case 4:	otp |= RC4_STEP<<i, i+=8;
    246 					case 3:	otp |= RC4_STEP<<i, i+=8;
    247 					case 2:	otp |= RC4_STEP<<i, i+=8;
    248 					case 1:	otp |= RC4_STEP<<i, i+=8;
    249 					case 0: ; /*
    250 						   * it's never the case,
    251 						   * but it has to be here
    252 						   * for ultrix?
    253 						   */
    254 					}
    255 				ochunk &= ~mask;
    256 				ochunk |= (otp^ichunk) & mask;
    257 				*(RC4_CHUNK *)outdata = ochunk;
    258 				}
    259 			key->x=x;
    260 			key->y=y;
    261 			return;
    262 			}
    263 		}
    264 #endif
    265 #define LOOP(in,out) \
    266 		x=((x+1)&0xff); \
    267 		tx=d[x]; \
    268 		y=(tx+y)&0xff; \
    269 		d[x]=ty=d[y]; \
    270 		d[y]=tx; \
    271 		(out) = d[(tx+ty)&0xff]^ (in);
    272 
    273 #ifndef RC4_INDEX
    274 #define RC4_LOOP(a,b,i)	LOOP(*((a)++),*((b)++))
    275 #else
    276 #define RC4_LOOP(a,b,i)	LOOP(a[i],b[i])
    277 #endif
    278 
    279 	i=len>>3;
    280 	if (i)
    281 		{
    282 		for (;;)
    283 			{
    284 			RC4_LOOP(indata,outdata,0);
    285 			RC4_LOOP(indata,outdata,1);
    286 			RC4_LOOP(indata,outdata,2);
    287 			RC4_LOOP(indata,outdata,3);
    288 			RC4_LOOP(indata,outdata,4);
    289 			RC4_LOOP(indata,outdata,5);
    290 			RC4_LOOP(indata,outdata,6);
    291 			RC4_LOOP(indata,outdata,7);
    292 #ifdef RC4_INDEX
    293 			indata+=8;
    294 			outdata+=8;
    295 #endif
    296 			if (--i == 0) break;
    297 			}
    298 		}
    299 	i=len&0x07;
    300 	if (i)
    301 		{
    302 		for (;;)
    303 			{
    304 			RC4_LOOP(indata,outdata,0); if (--i == 0) break;
    305 			RC4_LOOP(indata,outdata,1); if (--i == 0) break;
    306 			RC4_LOOP(indata,outdata,2); if (--i == 0) break;
    307 			RC4_LOOP(indata,outdata,3); if (--i == 0) break;
    308 			RC4_LOOP(indata,outdata,4); if (--i == 0) break;
    309 			RC4_LOOP(indata,outdata,5); if (--i == 0) break;
    310 			RC4_LOOP(indata,outdata,6); if (--i == 0) break;
    311 			}
    312 		}
    313 	key->x=x;
    314 	key->y=y;
    315 	}
    316