Home | History | Annotate | Download | only in poly1305
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // This code was translated into a form compatible with 5a from the public
      6 // domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
      7 
      8 // +build arm,!gccgo,!appengine
      9 
     10 DATA poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
     11 DATA poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
     12 DATA poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
     13 DATA poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
     14 DATA poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
     15 GLOBL poly1305_init_constants_armv6<>(SB), 8, $20
     16 
     17 // Warning: the linker may use R11 to synthesize certain instructions. Please
     18 // take care and verify that no synthetic instructions use it.
     19 
     20 TEXT poly1305_init_ext_armv6<>(SB),4,$-4
     21   MOVM.DB.W [R4-R11], (R13)
     22   MOVM.IA.W (R1), [R2-R5]
     23   MOVW $poly1305_init_constants_armv6<>(SB), R7
     24   MOVW R2, R8
     25   MOVW R2>>26, R9
     26   MOVW R3>>20, g
     27   MOVW R4>>14, R11
     28   MOVW R5>>8, R12
     29   ORR R3<<6, R9, R9
     30   ORR R4<<12, g, g
     31   ORR R5<<18, R11, R11
     32   MOVM.IA (R7), [R2-R6]
     33   AND R8, R2, R2
     34   AND R9, R3, R3
     35   AND g, R4, R4
     36   AND R11, R5, R5
     37   AND R12, R6, R6
     38   MOVM.IA.W [R2-R6], (R0)
     39   EOR R2, R2, R2
     40   EOR R3, R3, R3
     41   EOR R4, R4, R4
     42   EOR R5, R5, R5
     43   EOR R6, R6, R6
     44   MOVM.IA.W [R2-R6], (R0)
     45   MOVM.IA.W (R1), [R2-R5]
     46   MOVM.IA [R2-R6], (R0)
     47   MOVM.IA.W (R13), [R4-R11]
     48   RET
     49 
     50 #define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
     51   MOVBU (offset+0)(Rsrc), Rtmp; \
     52   MOVBU Rtmp, (offset+0)(Rdst); \
     53   MOVBU (offset+1)(Rsrc), Rtmp; \
     54   MOVBU Rtmp, (offset+1)(Rdst); \
     55   MOVBU (offset+2)(Rsrc), Rtmp; \
     56   MOVBU Rtmp, (offset+2)(Rdst); \
     57   MOVBU (offset+3)(Rsrc), Rtmp; \
     58   MOVBU Rtmp, (offset+3)(Rdst)
     59 
     60 TEXT poly1305_blocks_armv6<>(SB),4,$-4
     61   MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
     62   SUB $128, R13
     63   MOVW R0, 36(R13)
     64   MOVW R1, 40(R13)
     65   MOVW R2, 44(R13)
     66   MOVW R1, R14
     67   MOVW R2, R12
     68   MOVW 56(R0), R8
     69   WORD $0xe1180008 // TST R8, R8 not working see issue 5921
     70   EOR R6, R6, R6
     71   MOVW.EQ $(1<<24), R6
     72   MOVW R6, 32(R13)
     73   ADD $64, R13, g
     74   MOVM.IA (R0), [R0-R9]
     75   MOVM.IA [R0-R4], (g)
     76   CMP $16, R12
     77   BLO poly1305_blocks_armv6_done
     78 poly1305_blocks_armv6_mainloop:
     79   WORD $0xe31e0003 // TST R14, #3 not working see issue 5921
     80   BEQ poly1305_blocks_armv6_mainloop_aligned
     81   ADD $48, R13, g
     82   MOVW_UNALIGNED(R14, g, R0, 0)
     83   MOVW_UNALIGNED(R14, g, R0, 4)
     84   MOVW_UNALIGNED(R14, g, R0, 8)
     85   MOVW_UNALIGNED(R14, g, R0, 12)
     86   MOVM.IA (g), [R0-R3]
     87   ADD $16, R14
     88   B poly1305_blocks_armv6_mainloop_loaded
     89 poly1305_blocks_armv6_mainloop_aligned:
     90   MOVM.IA.W (R14), [R0-R3]
     91 poly1305_blocks_armv6_mainloop_loaded:
     92   MOVW R0>>26, g
     93   MOVW R1>>20, R11
     94   MOVW R2>>14, R12
     95   MOVW R14, 40(R13)
     96   MOVW R3>>8, R4
     97   ORR R1<<6, g, g
     98   ORR R2<<12, R11, R11
     99   ORR R3<<18, R12, R12
    100   BIC $0xfc000000, R0, R0
    101   BIC $0xfc000000, g, g
    102   MOVW 32(R13), R3
    103   BIC $0xfc000000, R11, R11
    104   BIC $0xfc000000, R12, R12
    105   ADD R0, R5, R5
    106   ADD g, R6, R6
    107   ORR R3, R4, R4
    108   ADD R11, R7, R7
    109   ADD $64, R13, R14
    110   ADD R12, R8, R8
    111   ADD R4, R9, R9
    112   MOVM.IA (R14), [R0-R4]
    113   MULLU R4, R5, (R11, g)
    114   MULLU R3, R5, (R14, R12)
    115   MULALU R3, R6, (R11, g)
    116   MULALU R2, R6, (R14, R12)
    117   MULALU R2, R7, (R11, g)
    118   MULALU R1, R7, (R14, R12)
    119   ADD R4<<2, R4, R4
    120   ADD R3<<2, R3, R3
    121   MULALU R1, R8, (R11, g)
    122   MULALU R0, R8, (R14, R12)
    123   MULALU R0, R9, (R11, g)
    124   MULALU R4, R9, (R14, R12)
    125   MOVW g, 24(R13)
    126   MOVW R11, 28(R13)
    127   MOVW R12, 16(R13)
    128   MOVW R14, 20(R13)
    129   MULLU R2, R5, (R11, g)
    130   MULLU R1, R5, (R14, R12)
    131   MULALU R1, R6, (R11, g)
    132   MULALU R0, R6, (R14, R12)
    133   MULALU R0, R7, (R11, g)
    134   MULALU R4, R7, (R14, R12)
    135   ADD R2<<2, R2, R2
    136   ADD R1<<2, R1, R1
    137   MULALU R4, R8, (R11, g)
    138   MULALU R3, R8, (R14, R12)
    139   MULALU R3, R9, (R11, g)
    140   MULALU R2, R9, (R14, R12)
    141   MOVW g, 8(R13)
    142   MOVW R11, 12(R13)
    143   MOVW R12, 0(R13)
    144   MOVW R14, w+4(SP)
    145   MULLU R0, R5, (R11, g)
    146   MULALU R4, R6, (R11, g)
    147   MULALU R3, R7, (R11, g)
    148   MULALU R2, R8, (R11, g)
    149   MULALU R1, R9, (R11, g)
    150   MOVM.IA (R13), [R0-R7]
    151   MOVW g>>26, R12
    152   MOVW R4>>26, R14
    153   ORR R11<<6, R12, R12
    154   ORR R5<<6, R14, R14
    155   BIC $0xfc000000, g, g
    156   BIC $0xfc000000, R4, R4
    157   ADD.S R12, R0, R0
    158   ADC $0, R1, R1
    159   ADD.S R14, R6, R6
    160   ADC $0, R7, R7
    161   MOVW R0>>26, R12
    162   MOVW R6>>26, R14
    163   ORR R1<<6, R12, R12
    164   ORR R7<<6, R14, R14
    165   BIC $0xfc000000, R0, R0
    166   BIC $0xfc000000, R6, R6
    167   ADD R14<<2, R14, R14
    168   ADD.S R12, R2, R2
    169   ADC $0, R3, R3
    170   ADD R14, g, g
    171   MOVW R2>>26, R12
    172   MOVW g>>26, R14
    173   ORR R3<<6, R12, R12
    174   BIC $0xfc000000, g, R5
    175   BIC $0xfc000000, R2, R7
    176   ADD R12, R4, R4
    177   ADD R14, R0, R0
    178   MOVW R4>>26, R12
    179   BIC $0xfc000000, R4, R8
    180   ADD R12, R6, R9
    181   MOVW w+44(SP), R12
    182   MOVW w+40(SP), R14
    183   MOVW R0, R6
    184   CMP $32, R12
    185   SUB $16, R12, R12
    186   MOVW R12, 44(R13)
    187   BHS poly1305_blocks_armv6_mainloop
    188 poly1305_blocks_armv6_done:
    189   MOVW 36(R13), R12
    190   MOVW R5, 20(R12)
    191   MOVW R6, 24(R12)
    192   MOVW R7, 28(R12)
    193   MOVW R8, 32(R12)
    194   MOVW R9, 36(R12)
    195   ADD $128, R13, R13
    196   MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
    197   RET
    198 
    199 #define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
    200   MOVBU.P 1(Rsrc), Rtmp; \
    201   MOVBU.P Rtmp, 1(Rdst); \
    202   MOVBU.P 1(Rsrc), Rtmp; \
    203   MOVBU.P Rtmp, 1(Rdst)
    204 
    205 #define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
    206   MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
    207   MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
    208 
    209 TEXT poly1305_finish_ext_armv6<>(SB),4,$-4
    210   MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
    211   SUB $16, R13, R13
    212   MOVW R0, R5
    213   MOVW R1, R6
    214   MOVW R2, R7
    215   MOVW R3, R8
    216   AND.S R2, R2, R2
    217   BEQ poly1305_finish_ext_armv6_noremaining
    218   EOR R0, R0
    219   MOVW R13, R9
    220   MOVW R0, 0(R13)
    221   MOVW R0, 4(R13)
    222   MOVW R0, 8(R13)
    223   MOVW R0, 12(R13)
    224   WORD $0xe3110003 // TST R1, #3 not working see issue 5921
    225   BEQ poly1305_finish_ext_armv6_aligned
    226   WORD $0xe3120008 // TST R2, #8 not working see issue 5921
    227   BEQ poly1305_finish_ext_armv6_skip8
    228   MOVWP_UNALIGNED(R1, R9, g)
    229   MOVWP_UNALIGNED(R1, R9, g)
    230 poly1305_finish_ext_armv6_skip8:
    231   WORD $0xe3120004 // TST $4, R2 not working see issue 5921
    232   BEQ poly1305_finish_ext_armv6_skip4
    233   MOVWP_UNALIGNED(R1, R9, g)
    234 poly1305_finish_ext_armv6_skip4:
    235   WORD $0xe3120002 // TST $2, R2 not working see issue 5921
    236   BEQ poly1305_finish_ext_armv6_skip2
    237   MOVHUP_UNALIGNED(R1, R9, g)
    238   B poly1305_finish_ext_armv6_skip2
    239 poly1305_finish_ext_armv6_aligned:
    240   WORD $0xe3120008 // TST R2, #8 not working see issue 5921
    241   BEQ poly1305_finish_ext_armv6_skip8_aligned
    242   MOVM.IA.W (R1), [g-R11]
    243   MOVM.IA.W [g-R11], (R9)
    244 poly1305_finish_ext_armv6_skip8_aligned:
    245   WORD $0xe3120004 // TST $4, R2 not working see issue 5921
    246   BEQ poly1305_finish_ext_armv6_skip4_aligned
    247   MOVW.P 4(R1), g
    248   MOVW.P g, 4(R9)
    249 poly1305_finish_ext_armv6_skip4_aligned:
    250   WORD $0xe3120002 // TST $2, R2 not working see issue 5921
    251   BEQ poly1305_finish_ext_armv6_skip2
    252   MOVHU.P 2(R1), g
    253   MOVH.P g, 2(R9)
    254 poly1305_finish_ext_armv6_skip2:
    255   WORD $0xe3120001 // TST $1, R2 not working see issue 5921
    256   BEQ poly1305_finish_ext_armv6_skip1
    257   MOVBU.P 1(R1), g
    258   MOVBU.P g, 1(R9)
    259 poly1305_finish_ext_armv6_skip1:
    260   MOVW $1, R11
    261   MOVBU R11, 0(R9)
    262   MOVW R11, 56(R5)
    263   MOVW R5, R0
    264   MOVW R13, R1
    265   MOVW $16, R2
    266   BL poly1305_blocks_armv6<>(SB)
    267 poly1305_finish_ext_armv6_noremaining:
    268   MOVW 20(R5), R0
    269   MOVW 24(R5), R1
    270   MOVW 28(R5), R2
    271   MOVW 32(R5), R3
    272   MOVW 36(R5), R4
    273   MOVW R4>>26, R12
    274   BIC $0xfc000000, R4, R4
    275   ADD R12<<2, R12, R12
    276   ADD R12, R0, R0
    277   MOVW R0>>26, R12
    278   BIC $0xfc000000, R0, R0
    279   ADD R12, R1, R1
    280   MOVW R1>>26, R12
    281   BIC $0xfc000000, R1, R1
    282   ADD R12, R2, R2
    283   MOVW R2>>26, R12
    284   BIC $0xfc000000, R2, R2
    285   ADD R12, R3, R3
    286   MOVW R3>>26, R12
    287   BIC $0xfc000000, R3, R3
    288   ADD R12, R4, R4
    289   ADD $5, R0, R6
    290   MOVW R6>>26, R12
    291   BIC $0xfc000000, R6, R6
    292   ADD R12, R1, R7
    293   MOVW R7>>26, R12
    294   BIC $0xfc000000, R7, R7
    295   ADD R12, R2, g
    296   MOVW g>>26, R12
    297   BIC $0xfc000000, g, g
    298   ADD R12, R3, R11
    299   MOVW $-(1<<26), R12
    300   ADD R11>>26, R12, R12
    301   BIC $0xfc000000, R11, R11
    302   ADD R12, R4, R14
    303   MOVW R14>>31, R12
    304   SUB $1, R12
    305   AND R12, R6, R6
    306   AND R12, R7, R7
    307   AND R12, g, g
    308   AND R12, R11, R11
    309   AND R12, R14, R14
    310   MVN R12, R12
    311   AND R12, R0, R0
    312   AND R12, R1, R1
    313   AND R12, R2, R2
    314   AND R12, R3, R3
    315   AND R12, R4, R4
    316   ORR R6, R0, R0
    317   ORR R7, R1, R1
    318   ORR g, R2, R2
    319   ORR R11, R3, R3
    320   ORR R14, R4, R4
    321   ORR R1<<26, R0, R0
    322   MOVW R1>>6, R1
    323   ORR R2<<20, R1, R1
    324   MOVW R2>>12, R2
    325   ORR R3<<14, R2, R2
    326   MOVW R3>>18, R3
    327   ORR R4<<8, R3, R3
    328   MOVW 40(R5), R6
    329   MOVW 44(R5), R7
    330   MOVW 48(R5), g
    331   MOVW 52(R5), R11
    332   ADD.S R6, R0, R0
    333   ADC.S R7, R1, R1
    334   ADC.S g, R2, R2
    335   ADC.S R11, R3, R3
    336   MOVM.IA [R0-R3], (R8)
    337   MOVW R5, R12
    338   EOR R0, R0, R0
    339   EOR R1, R1, R1
    340   EOR R2, R2, R2
    341   EOR R3, R3, R3
    342   EOR R4, R4, R4
    343   EOR R5, R5, R5
    344   EOR R6, R6, R6
    345   EOR R7, R7, R7
    346   MOVM.IA.W [R0-R7], (R12)
    347   MOVM.IA [R0-R7], (R12)
    348   ADD $16, R13, R13
    349   MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
    350   RET
    351 
    352 // func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
    353 TEXT poly1305_auth_armv6(SB),0,$280-16
    354   MOVW  out+0(FP), R4
    355   MOVW  m+4(FP), R5
    356   MOVW  mlen+8(FP), R6
    357   MOVW  key+12(FP), R7
    358 
    359   MOVW R13, R8
    360   BIC $63, R13
    361   SUB $64, R13, R13
    362   MOVW  R13, R0
    363   MOVW  R7, R1
    364   BL poly1305_init_ext_armv6<>(SB)
    365   BIC.S $15, R6, R2
    366   BEQ poly1305_auth_armv6_noblocks
    367   MOVW R13, R0
    368   MOVW R5, R1
    369   ADD R2, R5, R5
    370   SUB R2, R6, R6
    371   BL poly1305_blocks_armv6<>(SB)
    372 poly1305_auth_armv6_noblocks:
    373   MOVW R13, R0
    374   MOVW R5, R1
    375   MOVW R6, R2
    376   MOVW R4, R3
    377   BL poly1305_finish_ext_armv6<>(SB)
    378   MOVW R8, R13
    379   RET
    380