1 ; XzCrc64Opt.asm -- CRC64 calculation : optimized version 2 ; 2011-06-28 : Igor Pavlov : Public domain 3 4 include 7zAsm.asm 5 6 MY_ASM_START 7 8 ifdef x64 9 10 rD equ r9 11 rN equ r10 12 13 num_VAR equ r8 14 table_VAR equ r9 15 16 SRCDAT equ rN + rD 17 18 CRC_XOR macro dest:req, src:req, t:req 19 xor dest, QWORD PTR [r5 + src * 8 + 0800h * t] 20 endm 21 22 CRC1b macro 23 movzx x6, BYTE PTR [rD] 24 inc rD 25 movzx x3, x0_L 26 xor x6, x3 27 shr r0, 8 28 CRC_XOR r0, r6, 0 29 dec rN 30 endm 31 32 MY_PROLOG macro crc_end:req 33 MY_PUSH_4_REGS 34 35 mov r0, r1 36 mov rN, num_VAR 37 mov r5, table_VAR 38 mov rD, r2 39 test rN, rN 40 jz crc_end 41 @@: 42 test rD, 3 43 jz @F 44 CRC1b 45 jnz @B 46 @@: 47 cmp rN, 8 48 jb crc_end 49 add rN, rD 50 mov num_VAR, rN 51 sub rN, 4 52 and rN, NOT 3 53 sub rD, rN 54 mov x1, [SRCDAT] 55 xor r0, r1 56 add rN, 4 57 endm 58 59 MY_EPILOG macro crc_end:req 60 sub rN, 4 61 mov x1, [SRCDAT] 62 xor r0, r1 63 mov rD, rN 64 mov rN, num_VAR 65 sub rN, rD 66 crc_end: 67 test rN, rN 68 jz @F 69 CRC1b 70 jmp crc_end 71 @@: 72 MY_POP_4_REGS 73 endm 74 75 MY_PROC XzCrc64UpdateT4, 4 76 MY_PROLOG crc_end_4 77 align 16 78 main_loop_4: 79 mov x1, [SRCDAT] 80 movzx x2, x0_L 81 movzx x3, x0_H 82 shr r0, 16 83 movzx x6, x0_L 84 movzx x7, x0_H 85 shr r0, 16 86 CRC_XOR r1, r2, 3 87 CRC_XOR r0, r3, 2 88 CRC_XOR r1, r6, 1 89 CRC_XOR r0, r7, 0 90 xor r0, r1 91 92 add rD, 4 93 jnz main_loop_4 94 95 MY_EPILOG crc_end_4 96 MY_ENDP 97 98 else 99 100 rD equ r1 101 rN equ r7 102 103 crc_val equ (REG_SIZE * 5) 104 crc_table equ (8 + crc_val) 105 table_VAR equ [r4 + crc_table] 106 num_VAR equ table_VAR 107 108 109 SRCDAT equ rN + rD 110 111 CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req 112 op0 dest0, DWORD PTR [r5 + src * 8 + 0800h * t] 113 op1 dest1, DWORD PTR [r5 + src * 8 + 0800h * t + 4] 114 endm 115 116 CRC_XOR macro dest0:req, dest1:req, src:req, t:req 117 CRC xor, xor, dest0, dest1, src, t 118 endm 119 120 121 CRC1b macro 122 movzx x6, BYTE PTR [rD] 123 inc rD 124 movzx x3, x0_L 125 xor x6, x3 126 shrd r0, r2, 8 127 shr r2, 8 128 CRC_XOR r0, r2, r6, 0 129 dec rN 130 endm 131 132 MY_PROLOG macro crc_end:req 133 MY_PUSH_4_REGS 134 135 mov rN, r2 136 137 mov x0, [r4 + crc_val] 138 mov x2, [r4 + crc_val + 4] 139 mov r5, table_VAR 140 test rN, rN 141 jz crc_end 142 @@: 143 test rD, 3 144 jz @F 145 CRC1b 146 jnz @B 147 @@: 148 cmp rN, 8 149 jb crc_end 150 add rN, rD 151 152 mov num_VAR, rN 153 154 sub rN, 4 155 and rN, NOT 3 156 sub rD, rN 157 xor r0, [SRCDAT] 158 add rN, 4 159 endm 160 161 MY_EPILOG macro crc_end:req 162 sub rN, 4 163 xor r0, [SRCDAT] 164 165 mov rD, rN 166 mov rN, num_VAR 167 sub rN, rD 168 crc_end: 169 test rN, rN 170 jz @F 171 CRC1b 172 jmp crc_end 173 @@: 174 MY_POP_4_REGS 175 endm 176 177 MY_PROC XzCrc64UpdateT4, 5 178 MY_PROLOG crc_end_4 179 movzx x6, x0_L 180 align 16 181 main_loop_4: 182 mov r3, [SRCDAT] 183 xor r3, r2 184 185 CRC xor, mov, r3, r2, r6, 3 186 movzx x6, x0_H 187 shr r0, 16 188 CRC_XOR r3, r2, r6, 2 189 190 movzx x6, x0_L 191 movzx x0, x0_H 192 CRC_XOR r3, r2, r6, 1 193 CRC_XOR r3, r2, r0, 0 194 movzx x6, x3_L 195 mov r0, r3 196 197 add rD, 4 198 jnz main_loop_4 199 200 MY_EPILOG crc_end_4 201 MY_ENDP 202 203 endif 204 205 end 206