1 #include "arm_arch.h" 2 3 .text 4 .arch armv8-a+crypto 5 .global gcm_init_v8 6 .type gcm_init_v8,%function 7 .align 4 8 gcm_init_v8: 9 ld1 {v17.2d},[x1] //load H 10 movi v16.16b,#0xe1 11 ext v3.16b,v17.16b,v17.16b,#8 12 shl v16.2d,v16.2d,#57 13 ushr v18.2d,v16.2d,#63 14 ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01 15 dup v17.4s,v17.s[1] 16 ushr v19.2d,v3.2d,#63 17 sshr v17.4s,v17.4s,#31 //broadcast carry bit 18 and v19.16b,v19.16b,v16.16b 19 shl v3.2d,v3.2d,#1 20 ext v19.16b,v19.16b,v19.16b,#8 21 and v16.16b,v16.16b,v17.16b 22 orr v3.16b,v3.16b,v19.16b //H<<<=1 23 eor v3.16b,v3.16b,v16.16b //twisted H 24 st1 {v3.2d},[x0] 25 26 ret 27 .size gcm_init_v8,.-gcm_init_v8 28 29 .global gcm_gmult_v8 30 .type gcm_gmult_v8,%function 31 .align 4 32 gcm_gmult_v8: 33 ld1 {v17.2d},[x0] //load Xi 34 movi v19.16b,#0xe1 35 ld1 {v20.2d},[x1] //load twisted H 36 shl v19.2d,v19.2d,#57 37 #ifndef __ARMEB__ 38 rev64 v17.16b,v17.16b 39 #endif 40 ext v21.16b,v20.16b,v20.16b,#8 41 mov x3,#0 42 ext v3.16b,v17.16b,v17.16b,#8 43 mov x12,#0 44 eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing 45 mov x2,x0 46 b .Lgmult_v8 47 .size gcm_gmult_v8,.-gcm_gmult_v8 48 49 .global gcm_ghash_v8 50 .type gcm_ghash_v8,%function 51 .align 4 52 gcm_ghash_v8: 53 ld1 {v0.2d},[x0] //load [rotated] Xi 54 subs x3,x3,#16 55 movi v19.16b,#0xe1 56 mov x12,#16 57 ld1 {v20.2d},[x1] //load twisted H 58 csel x12,xzr,x12,eq 59 ext v0.16b,v0.16b,v0.16b,#8 60 shl v19.2d,v19.2d,#57 61 ld1 {v17.2d},[x2],x12 //load [rotated] inp 62 ext v21.16b,v20.16b,v20.16b,#8 63 #ifndef __ARMEB__ 64 rev64 v0.16b,v0.16b 65 rev64 v17.16b,v17.16b 66 #endif 67 eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing 68 ext v3.16b,v17.16b,v17.16b,#8 69 b .Loop_v8 70 71 .align 4 72 .Loop_v8: 73 ext v18.16b,v0.16b,v0.16b,#8 74 eor v3.16b,v3.16b,v0.16b //inp^=Xi 75 eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi 76 77 .Lgmult_v8: 78 pmull v0.1q,v20.1d,v3.1d //H.loXi.lo 79 eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing 80 pmull2 v2.1q,v20.2d,v3.2d //H.hiXi.hi 81 subs x3,x3,#16 82 pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)(Xi.lo+Xi.hi) 83 csel x12,xzr,x12,eq 84 85 ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing 86 eor v18.16b,v0.16b,v2.16b 87 eor v1.16b,v1.16b,v17.16b 88 ld1 {v17.2d},[x2],x12 //load [rotated] inp 89 eor v1.16b,v1.16b,v18.16b 90 pmull v18.1q,v0.1d,v19.1d //1st phase 91 92 ins v2.d[0],v1.d[1] 93 ins v1.d[1],v0.d[0] 94 #ifndef __ARMEB__ 95 rev64 v17.16b,v17.16b 96 #endif 97 eor v0.16b,v1.16b,v18.16b 98 ext v3.16b,v17.16b,v17.16b,#8 99 100 ext v18.16b,v0.16b,v0.16b,#8 //2nd phase 101 pmull v0.1q,v0.1d,v19.1d 102 eor v18.16b,v18.16b,v2.16b 103 eor v0.16b,v0.16b,v18.16b 104 b.hs .Loop_v8 105 106 #ifndef __ARMEB__ 107 rev64 v0.16b,v0.16b 108 #endif 109 ext v0.16b,v0.16b,v0.16b,#8 110 st1 {v0.2d},[x0] //write out Xi 111 112 ret 113 .size gcm_ghash_v8,.-gcm_ghash_v8 114 .asciz "GHASH for ARMv8, CRYPTOGAMS by <appro (at) openssl.org>" 115 .align 2 116