1 #include "crc32c.h" 2 3 #define CRC32C3X8(ITR) \ 4 crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\ 5 crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\ 6 crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR))); 7 8 #define CRC32C7X3X8(ITR) do {\ 9 CRC32C3X8((ITR)*7+0) \ 10 CRC32C3X8((ITR)*7+1) \ 11 CRC32C3X8((ITR)*7+2) \ 12 CRC32C3X8((ITR)*7+3) \ 13 CRC32C3X8((ITR)*7+4) \ 14 CRC32C3X8((ITR)*7+5) \ 15 CRC32C3X8((ITR)*7+6) \ 16 } while(0) 17 18 #ifndef HWCAP_CRC32 19 #define HWCAP_CRC32 (1 << 7) 20 #endif /* HWCAP_CRC32 */ 21 22 int crc32c_arm64_available = 0; 23 24 #ifdef ARCH_HAVE_ARM64_CRC_CRYPTO 25 26 #include <sys/auxv.h> 27 #include <arm_acle.h> 28 #include <arm_neon.h> 29 30 static int crc32c_probed; 31 32 /* 33 * Function to calculate reflected crc with PMULL Instruction 34 * crc done "by 3" for fixed input block size of 1024 bytes 35 */ 36 uint32_t crc32c_arm64(unsigned char const *data, unsigned long length) 37 { 38 signed long len = length; 39 uint32_t crc = ~0; 40 uint32_t crc0, crc1, crc2; 41 42 /* Load two consts: K1 and K2 */ 43 const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014; 44 uint64_t t0, t1; 45 46 while ((len -= 1024) >= 0) { 47 /* Do first 8 bytes here for better pipelining */ 48 crc0 = __crc32cd(crc, *(const uint64_t *)data); 49 crc1 = 0; 50 crc2 = 0; 51 data += sizeof(uint64_t); 52 53 /* Process block inline 54 Process crc0 last to avoid dependency with above */ 55 CRC32C7X3X8(0); 56 CRC32C7X3X8(1); 57 CRC32C7X3X8(2); 58 CRC32C7X3X8(3); 59 CRC32C7X3X8(4); 60 CRC32C7X3X8(5); 61 62 data += 42*3*sizeof(uint64_t); 63 64 /* Merge crc0 and crc1 into crc2 65 crc1 multiply by K2 66 crc0 multiply by K1 */ 67 68 t1 = (uint64_t)vmull_p64(crc1, k2); 69 t0 = (uint64_t)vmull_p64(crc0, k1); 70 crc = __crc32cd(crc2, *(const uint64_t *)data); 71 crc1 = __crc32cd(0, t1); 72 crc ^= crc1; 73 crc0 = __crc32cd(0, t0); 74 crc ^= crc0; 75 76 data += sizeof(uint64_t); 77 } 78 79 if (!(len += 1024)) 80 return crc; 81 82 while ((len -= sizeof(uint64_t)) >= 0) { 83 crc = __crc32cd(crc, *(const uint64_t *)data); 84 data += sizeof(uint64_t); 85 } 86 87 /* The following is more efficient than the straight loop */ 88 if (len & sizeof(uint32_t)) { 89 crc = __crc32cw(crc, *(const uint32_t *)data); 90 data += sizeof(uint32_t); 91 } 92 if (len & sizeof(uint16_t)) { 93 crc = __crc32ch(crc, *(const uint16_t *)data); 94 data += sizeof(uint16_t); 95 } 96 if (len & sizeof(uint8_t)) { 97 crc = __crc32cb(crc, *(const uint8_t *)data); 98 } 99 100 return crc; 101 } 102 103 void crc32c_arm64_probe(void) 104 { 105 unsigned long hwcap; 106 107 if (!crc32c_probed) { 108 hwcap = getauxval(AT_HWCAP); 109 if (hwcap & HWCAP_CRC32) 110 crc32c_arm64_available = 1; 111 crc32c_probed = 1; 112 } 113 } 114 115 #endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */ 116