Home | History | Annotate | Download | only in asm
      1 #include "arm_arch.h"
      2 
      3 .text
      4 .arch	armv8-a+crypto
      5 .global	gcm_init_v8
      6 .type	gcm_init_v8,%function
      7 .align	4
      8 gcm_init_v8:
      9 	ld1		{v17.2d},[x1]		//load H
     10 	movi		v16.16b,#0xe1
     11 	ext		v3.16b,v17.16b,v17.16b,#8
     12 	shl	v16.2d,v16.2d,#57
     13 	ushr	v18.2d,v16.2d,#63
     14 	ext		v16.16b,v18.16b,v16.16b,#8		//t0=0xc2....01
     15 	dup		v17.4s,v17.s[1]
     16 	ushr	v19.2d,v3.2d,#63
     17 	sshr	v17.4s,v17.4s,#31		//broadcast carry bit
     18 	and		v19.16b,v19.16b,v16.16b
     19 	shl	v3.2d,v3.2d,#1
     20 	ext		v19.16b,v19.16b,v19.16b,#8
     21 	and		v16.16b,v16.16b,v17.16b
     22 	orr		v3.16b,v3.16b,v19.16b		//H<<<=1
     23 	eor		v3.16b,v3.16b,v16.16b		//twisted H
     24 	st1		{v3.2d},[x0]
     25 
     26 	ret
     27 .size	gcm_init_v8,.-gcm_init_v8
     28 
     29 .global	gcm_gmult_v8
     30 .type	gcm_gmult_v8,%function
     31 .align	4
     32 gcm_gmult_v8:
     33 	ld1		{v17.2d},[x0]		//load Xi
     34 	movi		v19.16b,#0xe1
     35 	ld1		{v20.2d},[x1]		//load twisted H
     36 	shl	v19.2d,v19.2d,#57
     37 #ifndef __ARMEB__
     38 	rev64	v17.16b,v17.16b
     39 #endif
     40 	ext		v21.16b,v20.16b,v20.16b,#8
     41 	mov		x3,#0
     42 	ext		v3.16b,v17.16b,v17.16b,#8
     43 	mov		x12,#0
     44 	eor		v21.16b,v21.16b,v20.16b		//Karatsuba pre-processing
     45 	mov		x2,x0
     46 	b		.Lgmult_v8
     47 .size	gcm_gmult_v8,.-gcm_gmult_v8
     48 
     49 .global	gcm_ghash_v8
     50 .type	gcm_ghash_v8,%function
     51 .align	4
     52 gcm_ghash_v8:
     53 	ld1		{v0.2d},[x0]		//load [rotated] Xi
     54 	subs		x3,x3,#16
     55 	movi		v19.16b,#0xe1
     56 	mov		x12,#16
     57 	ld1		{v20.2d},[x1]		//load twisted H
     58 	csel	x12,xzr,x12,eq
     59 	ext		v0.16b,v0.16b,v0.16b,#8
     60 	shl	v19.2d,v19.2d,#57
     61 	ld1		{v17.2d},[x2],x12	//load [rotated] inp
     62 	ext		v21.16b,v20.16b,v20.16b,#8
     63 #ifndef __ARMEB__
     64 	rev64	v0.16b,v0.16b
     65 	rev64	v17.16b,v17.16b
     66 #endif
     67 	eor		v21.16b,v21.16b,v20.16b		//Karatsuba pre-processing
     68 	ext		v3.16b,v17.16b,v17.16b,#8
     69 	b		.Loop_v8
     70 
     71 .align	4
     72 .Loop_v8:
     73 	ext		v18.16b,v0.16b,v0.16b,#8
     74 	eor		v3.16b,v3.16b,v0.16b		//inp^=Xi
     75 	eor		v17.16b,v17.16b,v18.16b		//v17.16b is rotated inp^Xi
     76 
     77 .Lgmult_v8:
     78 	pmull	v0.1q,v20.1d,v3.1d		//H.loXi.lo
     79 	eor		v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
     80 	pmull2	v2.1q,v20.2d,v3.2d		//H.hiXi.hi
     81 	subs		x3,x3,#16
     82 	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)(Xi.lo+Xi.hi)
     83 	csel	x12,xzr,x12,eq
     84 
     85 	ext		v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
     86 	eor		v18.16b,v0.16b,v2.16b
     87 	eor		v1.16b,v1.16b,v17.16b
     88 	 ld1	{v17.2d},[x2],x12	//load [rotated] inp
     89 	eor		v1.16b,v1.16b,v18.16b
     90 	pmull	v18.1q,v0.1d,v19.1d		//1st phase
     91 
     92 	ins	v2.d[0],v1.d[1]
     93 	ins	v1.d[1],v0.d[0]
     94 #ifndef __ARMEB__
     95 	 rev64	v17.16b,v17.16b
     96 #endif
     97 	eor		v0.16b,v1.16b,v18.16b
     98 	 ext		v3.16b,v17.16b,v17.16b,#8
     99 
    100 	ext		v18.16b,v0.16b,v0.16b,#8		//2nd phase
    101 	pmull	v0.1q,v0.1d,v19.1d
    102 	eor		v18.16b,v18.16b,v2.16b
    103 	eor		v0.16b,v0.16b,v18.16b
    104 	b.hs		.Loop_v8
    105 
    106 #ifndef __ARMEB__
    107 	rev64	v0.16b,v0.16b
    108 #endif
    109 	ext		v0.16b,v0.16b,v0.16b,#8
    110 	st1		{v0.2d},[x0]		//write out Xi
    111 
    112 	ret
    113 .size	gcm_ghash_v8,.-gcm_ghash_v8
    114 .asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro (at) openssl.org>"
    115 .align  2
    116