Home | History | Annotate | Download | only in x86
      1 ; AesOpt.asm -- Intel's AES.
      2 ; 2009-12-12 : Igor Pavlov : Public domain
      3 
      4 include 7zAsm.asm
      5 
      6 MY_ASM_START
      7 
      8 ifndef x64
      9     .xmm
     10 endif
     11 
     12 ifdef x64
     13     num     equ r8
     14 else
     15     num     equ [r4 + REG_SIZE * 4]
     16 endif
     17 
     18 rD equ r2
     19 rN equ r0
     20 
     21 MY_PROLOG macro reg:req
     22     ifdef x64
     23     movdqa  [r4 + 8], xmm6
     24     movdqa  [r4 + 8 + 16], xmm7
     25     endif
     26 
     27     push    r3
     28     push    r5
     29     push    r6
     30 
     31     mov     rN, num
     32     mov     x6, [r1 + 16]
     33     shl     x6, 5
     34 
     35     movdqa  reg, [r1]
     36     add     r1, 32
     37 endm
     38 
     39 MY_EPILOG macro
     40     pop     r6
     41     pop     r5
     42     pop     r3
     43 
     44     ifdef x64
     45     movdqa  xmm6, [r4 + 8]
     46     movdqa  xmm7, [r4 + 8 + 16]
     47     endif
     48 
     49     MY_ENDP
     50 endm
     51 
     52 ways equ 4
     53 ways16 equ (ways * 16)
     54 
     55 OP_W macro op, op2
     56     i = 0
     57     rept ways
     58     op @CatStr(xmm,%i), op2
     59     i = i + 1
     60     endm
     61 endm
     62 
     63 LOAD_OP macro op:req, offs:req
     64     op      xmm0, [r1 + r3 offs]
     65 endm
     66   
     67 LOAD_OP_W macro op:req, offs:req
     68     movdqa  xmm7, [r1 + r3 offs]
     69     OP_W    op, xmm7
     70 endm
     71 
     72 
     73 ; ---------- AES-CBC Decode ----------
     74 
     75 CBC_DEC_UPDATE macro reg, offs
     76     pxor    reg, xmm6
     77     movdqa  xmm6, [rD + offs]
     78     movdqa  [rD + offs], reg
     79 endm
     80 
     81 DECODE macro op:req
     82     op      aesdec, +16
     83   @@:
     84     op      aesdec, +0
     85     op      aesdec, -16
     86     sub     x3, 32
     87     jnz     @B
     88     op      aesdeclast, +0
     89 endm
     90 
     91 MY_PROC AesCbc_Decode_Intel, 3
     92     MY_PROLOG xmm6
     93 
     94     sub     x6, 32
     95 
     96     jmp     check2
     97 
     98   align 16
     99   nextBlocks2:
    100     mov     x3, x6
    101     OP_W    movdqa, [rD + i * 16]
    102     LOAD_OP_W  pxor, +32
    103     DECODE  LOAD_OP_W
    104     OP_W    CBC_DEC_UPDATE, i * 16
    105     add     rD, ways16
    106   check2:
    107     sub     rN, ways
    108     jnc     nextBlocks2
    109 
    110     add     rN, ways
    111     jmp     check
    112 
    113   nextBlock:
    114     mov     x3, x6
    115     movdqa  xmm1, [rD]
    116     LOAD_OP movdqa, +32
    117     pxor    xmm0, xmm1
    118     DECODE  LOAD_OP
    119     pxor    xmm0, xmm6
    120     movdqa  [rD], xmm0
    121     movdqa  xmm6, xmm1
    122     add     rD, 16
    123   check:
    124     sub     rN, 1
    125     jnc     nextBlock
    126 
    127     movdqa  [r1 - 32], xmm6
    128     MY_EPILOG
    129 
    130 
    131 ; ---------- AES-CBC Encode ----------
    132 
    133 ENCODE macro op:req
    134     op      aesenc, -16
    135   @@:
    136     op      aesenc, +0
    137     op      aesenc, +16
    138     add     r3, 32
    139     jnz     @B
    140     op      aesenclast, +0
    141 endm
    142 
    143 MY_PROC AesCbc_Encode_Intel, 3
    144     MY_PROLOG xmm0
    145 
    146     add     r1, r6
    147     neg     r6
    148     add     r6, 32
    149 
    150     jmp     check_e
    151 
    152   align 16
    153   nextBlock_e:
    154     mov     r3, r6
    155     pxor    xmm0, [rD]
    156     pxor    xmm0, [r1 + r3 - 32]
    157     ENCODE  LOAD_OP
    158     movdqa  [rD], xmm0
    159     add     rD, 16
    160   check_e:
    161     sub     rN, 1
    162     jnc     nextBlock_e
    163 
    164     movdqa  [r1 + r6 - 64], xmm0
    165     MY_EPILOG
    166 
    167 
    168 ; ---------- AES-CTR ----------
    169 
    170 XOR_UPD_1 macro reg, offs
    171     pxor    reg, [rD + offs]
    172 endm
    173 
    174 XOR_UPD_2 macro reg, offs
    175     movdqa  [rD + offs], reg
    176 endm
    177 
    178 MY_PROC AesCtr_Code_Intel, 3
    179     MY_PROLOG xmm6
    180 
    181     mov     r5, r4
    182     shr     r5, 4
    183     dec     r5
    184     shl     r5, 4
    185 
    186     mov     DWORD PTR [r5], 1
    187     mov     DWORD PTR [r5 + 4], 0
    188     mov     DWORD PTR [r5 + 8], 0
    189     mov     DWORD PTR [r5 + 12], 0
    190     
    191     add     r1, r6
    192     neg     r6
    193     add     r6, 32
    194 
    195     jmp     check2_c
    196 
    197   align 16
    198   nextBlocks2_c:
    199     movdqa  xmm7, [r5]
    200 
    201     i = 0
    202     rept ways
    203     paddq   xmm6, xmm7
    204     movdqa  @CatStr(xmm,%i), xmm6
    205     i = i + 1
    206     endm
    207 
    208     mov     r3, r6
    209     LOAD_OP_W  pxor, -32
    210     ENCODE  LOAD_OP_W
    211     OP_W    XOR_UPD_1, i * 16
    212     OP_W    XOR_UPD_2, i * 16
    213     add     rD, ways16
    214   check2_c:
    215     sub     rN, ways
    216     jnc     nextBlocks2_c
    217 
    218     add     rN, ways
    219     jmp     check_c
    220 
    221   nextBlock_c:
    222     paddq   xmm6, [r5]
    223     mov     r3, r6
    224     movdqa  xmm0, [r1 + r3 - 32]
    225     pxor    xmm0, xmm6
    226     ENCODE  LOAD_OP
    227     XOR_UPD_1 xmm0, 0
    228     XOR_UPD_2 xmm0, 0
    229     add     rD, 16
    230   check_c:
    231     sub     rN, 1
    232     jnc     nextBlock_c
    233 
    234     movdqa  [r1 + r6 - 64], xmm6
    235     MY_EPILOG
    236 
    237 end
    238