Lines Matching full:rc4
12 # This is RC4+MD5 "stitch" implementation. The idea, as spelled in
16 # processor resources better and achieve better performance. RC4
17 # instruction sequence is virtually identical to rc4-x86_64.pl, which
20 # minimize register usage, which was used as "main thread" with RC4
21 # weaved into it, one RC4 round per one MD5 round. In addition to the
23 # md5_block_asm_data_order and RC4. Below are performance numbers in
27 # RC4 MD5 RC4+MD5 stitch gain
34 # (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement
37 my ($rc4,$md5)=(1,1); # what to generate?
38 my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(),
40 # to be able to use 'openssl speed rc4' for
58 if ($rc4 && !$md5) {
60 $func="RC4"; $nargs=4;
61 } elsif ($md5 && !$rc4) {
69 # const void *in0, # RC4 input
70 # void *out, # RC4 output
99 my @XX=("%rbp","%rsi"); # RC4 registers
124 if ($rc4) {
192 #rc4# add $TX[0]#b,$YY#b
193 #rc4# lea ($dat,$XX[0],4),$XX[1]
220 $code.=" movdqu ($in0),%xmm2\n" if ($rc4 && $j==15);
221 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
222 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
224 #rc4# movl ($dat,$YY,4),$TY#d
226 #rc4# movl $TX[0]#d,($dat,$YY,4)
229 #rc4# add $TY#b,$TX[0]#b
230 #rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
233 #rc4# movz $TX[0]#b,$TX[0]#d
234 #rc4# movl $TY#d,4*$k($XX[1])
236 #rc4# add $TX[1]#b,$YY#b
239 #rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
242 $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
248 $code.=<<___ if ($rc4 && $j==15);
260 $code.=" movdqu 16($in0),%xmm3\n" if ($rc4 && $j==15);
261 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
262 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
264 #rc4# movl ($dat,$YY,4),$TY#d
266 #rc4# movl $TX[0]#d,($dat,$YY,4)
269 #rc4# add $TY#b,$TX[0]#b
270 #rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
273 #rc4# movz $TX[0]#b,$TX[0]#d
274 #rc4# movl $TY#d,4*$k($XX[1])
276 #rc4# add $TX[1]#b,$YY#b
279 #rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
282 $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
288 $code.=<<___ if ($rc4 && $j==15);
300 $code.=" movdqu 32($in0),%xmm4\n" if ($rc4 && $j==15);
301 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
302 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
304 #rc4# movl ($dat,$YY,4),$TY#d
306 #rc4# movl $TX[0]#d,($dat,$YY,4)
309 #rc4# add $TY#b,$TX[0]#b
310 #rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
312 #rc4# movz $TX[0]#b,$TX[0]#d
314 #rc4# movl $TY#d,4*$k($XX[1])
315 #rc4# add $TX[1]#b,$YY#b
318 #rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
321 $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
327 $code.=<<___ if ($rc4 && $j==15);
339 $code.=" movdqu 48($in0),%xmm5\n" if ($rc4 && $j==15);
340 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
341 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
343 #rc4# movl ($dat,$YY,4),$TY#d
345 #rc4# movl $TX[0]#d,($dat,$YY,4)
348 #rc4# add $TY#b,$TX[0]#b
349 #rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
351 #rc4# movz $TX[0]#b,$TX[0]#d
353 #rc4# movl $TY#d,4*$k($XX[1])
355 #rc4# add $TX[1]#b,$YY#b
358 #rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
361 $code.=<<___ if ($rc4 && $j==15);
387 #rc4# movdqu %xmm2,($out,$in0) # write RC4 output
388 #rc4# movdqu %xmm3,16($out,$in0)
389 #rc4# movdqu %xmm4,32($out,$in0)
390 #rc4# movdqu %xmm5,48($out,$in0)
392 #rc4# lea 64($in0),$in0
397 #rc4# sub $TX[0]#b,$YY#b # correct $YY
403 $code.=<<___ if ($rc4 && (!$md5 || $D));
428 #rc4# sub \$1,$XX[0]#b
429 #rc4# movl $XX[0]#d,-8($dat)
430 #rc4# movl $YY#d,-4($dat)
445 if ($rc4 && $D) { # sole purpose of this section is to provide
447 # replacement for rc4-x86_64.pl for debugging
502 .asciz "rc4(64x,int)"
627 $code =~ s/#rc4#//gm if ($rc4);