Home | History | Annotate | Download | only in kcp
      1 /*
      2 Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
      3 Joan Daemen, Michal Peeters, Gilles Van Assche and Ronny Van Keer, hereby
      4 denoted as "the implementer".
      5 
      6 For more information, feedback or questions, please refer to our websites:
      7 http://keccak.noekeon.org/
      8 http://keyak.noekeon.org/
      9 http://ketje.noekeon.org/
     10 
     11 To the extent possible under law, the implementer has waived all copyright
     12 and related or neighboring rights to the source code in this file.
     13 http://creativecommons.org/publicdomain/zero/1.0/
     14 */
     15 
     16 #define declareABCDE \
     17     UINT64 Aba, Abe, Abi, Abo, Abu; \
     18     UINT64 Aga, Age, Agi, Ago, Agu; \
     19     UINT64 Aka, Ake, Aki, Ako, Aku; \
     20     UINT64 Ama, Ame, Ami, Amo, Amu; \
     21     UINT64 Asa, Ase, Asi, Aso, Asu; \
     22     UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
     23     UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
     24     UINT64 Bka, Bke, Bki, Bko, Bku; \
     25     UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
     26     UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
     27     UINT64 Ca, Ce, Ci, Co, Cu; \
     28     UINT64 Da, De, Di, Do, Du; \
     29     UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
     30     UINT64 Ega, Ege, Egi, Ego, Egu; \
     31     UINT64 Eka, Eke, Eki, Eko, Eku; \
     32     UINT64 Ema, Eme, Emi, Emo, Emu; \
     33     UINT64 Esa, Ese, Esi, Eso, Esu; \
     34 
     35 #define prepareTheta \
     36     Ca = Aba^Aga^Aka^Ama^Asa; \
     37     Ce = Abe^Age^Ake^Ame^Ase; \
     38     Ci = Abi^Agi^Aki^Ami^Asi; \
     39     Co = Abo^Ago^Ako^Amo^Aso; \
     40     Cu = Abu^Agu^Aku^Amu^Asu; \
     41 
     42 #ifdef UseBebigokimisa
     43 /* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
     44 
     45 /* --- 64-bit lanes mapped to 64-bit words */
     46 
     47 #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
     48     Da = Cu^ROL64(Ce, 1); \
     49     De = Ca^ROL64(Ci, 1); \
     50     Di = Ce^ROL64(Co, 1); \
     51     Do = Ci^ROL64(Cu, 1); \
     52     Du = Co^ROL64(Ca, 1); \
     53 \
     54     A##ba ^= Da; \
     55     Bba = A##ba; \
     56     A##ge ^= De; \
     57     Bbe = ROL64(A##ge, 44); \
     58     A##ki ^= Di; \
     59     Bbi = ROL64(A##ki, 43); \
     60     A##mo ^= Do; \
     61     Bbo = ROL64(A##mo, 21); \
     62     A##su ^= Du; \
     63     Bbu = ROL64(A##su, 14); \
     64     E##ba =   Bba ^(  Bbe |  Bbi ); \
     65     E##ba ^= KeccakF1600RoundConstants[i]; \
     66     Ca = E##ba; \
     67     E##be =   Bbe ^((~Bbi)|  Bbo ); \
     68     Ce = E##be; \
     69     E##bi =   Bbi ^(  Bbo &  Bbu ); \
     70     Ci = E##bi; \
     71     E##bo =   Bbo ^(  Bbu |  Bba ); \
     72     Co = E##bo; \
     73     E##bu =   Bbu ^(  Bba &  Bbe ); \
     74     Cu = E##bu; \
     75 \
     76     A##bo ^= Do; \
     77     Bga = ROL64(A##bo, 28); \
     78     A##gu ^= Du; \
     79     Bge = ROL64(A##gu, 20); \
     80     A##ka ^= Da; \
     81     Bgi = ROL64(A##ka, 3); \
     82     A##me ^= De; \
     83     Bgo = ROL64(A##me, 45); \
     84     A##si ^= Di; \
     85     Bgu = ROL64(A##si, 61); \
     86     E##ga =   Bga ^(  Bge |  Bgi ); \
     87     Ca ^= E##ga; \
     88     E##ge =   Bge ^(  Bgi &  Bgo ); \
     89     Ce ^= E##ge; \
     90     E##gi =   Bgi ^(  Bgo |(~Bgu)); \
     91     Ci ^= E##gi; \
     92     E##go =   Bgo ^(  Bgu |  Bga ); \
     93     Co ^= E##go; \
     94     E##gu =   Bgu ^(  Bga &  Bge ); \
     95     Cu ^= E##gu; \
     96 \
     97     A##be ^= De; \
     98     Bka = ROL64(A##be, 1); \
     99     A##gi ^= Di; \
    100     Bke = ROL64(A##gi, 6); \
    101     A##ko ^= Do; \
    102     Bki = ROL64(A##ko, 25); \
    103     A##mu ^= Du; \
    104     Bko = ROL64(A##mu, 8); \
    105     A##sa ^= Da; \
    106     Bku = ROL64(A##sa, 18); \
    107     E##ka =   Bka ^(  Bke |  Bki ); \
    108     Ca ^= E##ka; \
    109     E##ke =   Bke ^(  Bki &  Bko ); \
    110     Ce ^= E##ke; \
    111     E##ki =   Bki ^((~Bko)&  Bku ); \
    112     Ci ^= E##ki; \
    113     E##ko = (~Bko)^(  Bku |  Bka ); \
    114     Co ^= E##ko; \
    115     E##ku =   Bku ^(  Bka &  Bke ); \
    116     Cu ^= E##ku; \
    117 \
    118     A##bu ^= Du; \
    119     Bma = ROL64(A##bu, 27); \
    120     A##ga ^= Da; \
    121     Bme = ROL64(A##ga, 36); \
    122     A##ke ^= De; \
    123     Bmi = ROL64(A##ke, 10); \
    124     A##mi ^= Di; \
    125     Bmo = ROL64(A##mi, 15); \
    126     A##so ^= Do; \
    127     Bmu = ROL64(A##so, 56); \
    128     E##ma =   Bma ^(  Bme &  Bmi ); \
    129     Ca ^= E##ma; \
    130     E##me =   Bme ^(  Bmi |  Bmo ); \
    131     Ce ^= E##me; \
    132     E##mi =   Bmi ^((~Bmo)|  Bmu ); \
    133     Ci ^= E##mi; \
    134     E##mo = (~Bmo)^(  Bmu &  Bma ); \
    135     Co ^= E##mo; \
    136     E##mu =   Bmu ^(  Bma |  Bme ); \
    137     Cu ^= E##mu; \
    138 \
    139     A##bi ^= Di; \
    140     Bsa = ROL64(A##bi, 62); \
    141     A##go ^= Do; \
    142     Bse = ROL64(A##go, 55); \
    143     A##ku ^= Du; \
    144     Bsi = ROL64(A##ku, 39); \
    145     A##ma ^= Da; \
    146     Bso = ROL64(A##ma, 41); \
    147     A##se ^= De; \
    148     Bsu = ROL64(A##se, 2); \
    149     E##sa =   Bsa ^((~Bse)&  Bsi ); \
    150     Ca ^= E##sa; \
    151     E##se = (~Bse)^(  Bsi |  Bso ); \
    152     Ce ^= E##se; \
    153     E##si =   Bsi ^(  Bso &  Bsu ); \
    154     Ci ^= E##si; \
    155     E##so =   Bso ^(  Bsu |  Bsa ); \
    156     Co ^= E##so; \
    157     E##su =   Bsu ^(  Bsa &  Bse ); \
    158     Cu ^= E##su; \
    159 \
    160 
    161 /* --- Code for round (lane complementing pattern 'bebigokimisa') */
    162 
    163 /* --- 64-bit lanes mapped to 64-bit words */
    164 
    165 #define thetaRhoPiChiIota(i, A, E) \
    166     Da = Cu^ROL64(Ce, 1); \
    167     De = Ca^ROL64(Ci, 1); \
    168     Di = Ce^ROL64(Co, 1); \
    169     Do = Ci^ROL64(Cu, 1); \
    170     Du = Co^ROL64(Ca, 1); \
    171 \
    172     A##ba ^= Da; \
    173     Bba = A##ba; \
    174     A##ge ^= De; \
    175     Bbe = ROL64(A##ge, 44); \
    176     A##ki ^= Di; \
    177     Bbi = ROL64(A##ki, 43); \
    178     A##mo ^= Do; \
    179     Bbo = ROL64(A##mo, 21); \
    180     A##su ^= Du; \
    181     Bbu = ROL64(A##su, 14); \
    182     E##ba =   Bba ^(  Bbe |  Bbi ); \
    183     E##ba ^= KeccakF1600RoundConstants[i]; \
    184     E##be =   Bbe ^((~Bbi)|  Bbo ); \
    185     E##bi =   Bbi ^(  Bbo &  Bbu ); \
    186     E##bo =   Bbo ^(  Bbu |  Bba ); \
    187     E##bu =   Bbu ^(  Bba &  Bbe ); \
    188 \
    189     A##bo ^= Do; \
    190     Bga = ROL64(A##bo, 28); \
    191     A##gu ^= Du; \
    192     Bge = ROL64(A##gu, 20); \
    193     A##ka ^= Da; \
    194     Bgi = ROL64(A##ka, 3); \
    195     A##me ^= De; \
    196     Bgo = ROL64(A##me, 45); \
    197     A##si ^= Di; \
    198     Bgu = ROL64(A##si, 61); \
    199     E##ga =   Bga ^(  Bge |  Bgi ); \
    200     E##ge =   Bge ^(  Bgi &  Bgo ); \
    201     E##gi =   Bgi ^(  Bgo |(~Bgu)); \
    202     E##go =   Bgo ^(  Bgu |  Bga ); \
    203     E##gu =   Bgu ^(  Bga &  Bge ); \
    204 \
    205     A##be ^= De; \
    206     Bka = ROL64(A##be, 1); \
    207     A##gi ^= Di; \
    208     Bke = ROL64(A##gi, 6); \
    209     A##ko ^= Do; \
    210     Bki = ROL64(A##ko, 25); \
    211     A##mu ^= Du; \
    212     Bko = ROL64(A##mu, 8); \
    213     A##sa ^= Da; \
    214     Bku = ROL64(A##sa, 18); \
    215     E##ka =   Bka ^(  Bke |  Bki ); \
    216     E##ke =   Bke ^(  Bki &  Bko ); \
    217     E##ki =   Bki ^((~Bko)&  Bku ); \
    218     E##ko = (~Bko)^(  Bku |  Bka ); \
    219     E##ku =   Bku ^(  Bka &  Bke ); \
    220 \
    221     A##bu ^= Du; \
    222     Bma = ROL64(A##bu, 27); \
    223     A##ga ^= Da; \
    224     Bme = ROL64(A##ga, 36); \
    225     A##ke ^= De; \
    226     Bmi = ROL64(A##ke, 10); \
    227     A##mi ^= Di; \
    228     Bmo = ROL64(A##mi, 15); \
    229     A##so ^= Do; \
    230     Bmu = ROL64(A##so, 56); \
    231     E##ma =   Bma ^(  Bme &  Bmi ); \
    232     E##me =   Bme ^(  Bmi |  Bmo ); \
    233     E##mi =   Bmi ^((~Bmo)|  Bmu ); \
    234     E##mo = (~Bmo)^(  Bmu &  Bma ); \
    235     E##mu =   Bmu ^(  Bma |  Bme ); \
    236 \
    237     A##bi ^= Di; \
    238     Bsa = ROL64(A##bi, 62); \
    239     A##go ^= Do; \
    240     Bse = ROL64(A##go, 55); \
    241     A##ku ^= Du; \
    242     Bsi = ROL64(A##ku, 39); \
    243     A##ma ^= Da; \
    244     Bso = ROL64(A##ma, 41); \
    245     A##se ^= De; \
    246     Bsu = ROL64(A##se, 2); \
    247     E##sa =   Bsa ^((~Bse)&  Bsi ); \
    248     E##se = (~Bse)^(  Bsi |  Bso ); \
    249     E##si =   Bsi ^(  Bso &  Bsu ); \
    250     E##so =   Bso ^(  Bsu |  Bsa ); \
    251     E##su =   Bsu ^(  Bsa &  Bse ); \
    252 \
    253 
    254 #else /* UseBebigokimisa */
    255 
    256 /* --- Code for round, with prepare-theta */
    257 
    258 /* --- 64-bit lanes mapped to 64-bit words */
    259 
    260 #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
    261     Da = Cu^ROL64(Ce, 1); \
    262     De = Ca^ROL64(Ci, 1); \
    263     Di = Ce^ROL64(Co, 1); \
    264     Do = Ci^ROL64(Cu, 1); \
    265     Du = Co^ROL64(Ca, 1); \
    266 \
    267     A##ba ^= Da; \
    268     Bba = A##ba; \
    269     A##ge ^= De; \
    270     Bbe = ROL64(A##ge, 44); \
    271     A##ki ^= Di; \
    272     Bbi = ROL64(A##ki, 43); \
    273     A##mo ^= Do; \
    274     Bbo = ROL64(A##mo, 21); \
    275     A##su ^= Du; \
    276     Bbu = ROL64(A##su, 14); \
    277     E##ba =   Bba ^((~Bbe)&  Bbi ); \
    278     E##ba ^= KeccakF1600RoundConstants[i]; \
    279     Ca = E##ba; \
    280     E##be =   Bbe ^((~Bbi)&  Bbo ); \
    281     Ce = E##be; \
    282     E##bi =   Bbi ^((~Bbo)&  Bbu ); \
    283     Ci = E##bi; \
    284     E##bo =   Bbo ^((~Bbu)&  Bba ); \
    285     Co = E##bo; \
    286     E##bu =   Bbu ^((~Bba)&  Bbe ); \
    287     Cu = E##bu; \
    288 \
    289     A##bo ^= Do; \
    290     Bga = ROL64(A##bo, 28); \
    291     A##gu ^= Du; \
    292     Bge = ROL64(A##gu, 20); \
    293     A##ka ^= Da; \
    294     Bgi = ROL64(A##ka, 3); \
    295     A##me ^= De; \
    296     Bgo = ROL64(A##me, 45); \
    297     A##si ^= Di; \
    298     Bgu = ROL64(A##si, 61); \
    299     E##ga =   Bga ^((~Bge)&  Bgi ); \
    300     Ca ^= E##ga; \
    301     E##ge =   Bge ^((~Bgi)&  Bgo ); \
    302     Ce ^= E##ge; \
    303     E##gi =   Bgi ^((~Bgo)&  Bgu ); \
    304     Ci ^= E##gi; \
    305     E##go =   Bgo ^((~Bgu)&  Bga ); \
    306     Co ^= E##go; \
    307     E##gu =   Bgu ^((~Bga)&  Bge ); \
    308     Cu ^= E##gu; \
    309 \
    310     A##be ^= De; \
    311     Bka = ROL64(A##be, 1); \
    312     A##gi ^= Di; \
    313     Bke = ROL64(A##gi, 6); \
    314     A##ko ^= Do; \
    315     Bki = ROL64(A##ko, 25); \
    316     A##mu ^= Du; \
    317     Bko = ROL64(A##mu, 8); \
    318     A##sa ^= Da; \
    319     Bku = ROL64(A##sa, 18); \
    320     E##ka =   Bka ^((~Bke)&  Bki ); \
    321     Ca ^= E##ka; \
    322     E##ke =   Bke ^((~Bki)&  Bko ); \
    323     Ce ^= E##ke; \
    324     E##ki =   Bki ^((~Bko)&  Bku ); \
    325     Ci ^= E##ki; \
    326     E##ko =   Bko ^((~Bku)&  Bka ); \
    327     Co ^= E##ko; \
    328     E##ku =   Bku ^((~Bka)&  Bke ); \
    329     Cu ^= E##ku; \
    330 \
    331     A##bu ^= Du; \
    332     Bma = ROL64(A##bu, 27); \
    333     A##ga ^= Da; \
    334     Bme = ROL64(A##ga, 36); \
    335     A##ke ^= De; \
    336     Bmi = ROL64(A##ke, 10); \
    337     A##mi ^= Di; \
    338     Bmo = ROL64(A##mi, 15); \
    339     A##so ^= Do; \
    340     Bmu = ROL64(A##so, 56); \
    341     E##ma =   Bma ^((~Bme)&  Bmi ); \
    342     Ca ^= E##ma; \
    343     E##me =   Bme ^((~Bmi)&  Bmo ); \
    344     Ce ^= E##me; \
    345     E##mi =   Bmi ^((~Bmo)&  Bmu ); \
    346     Ci ^= E##mi; \
    347     E##mo =   Bmo ^((~Bmu)&  Bma ); \
    348     Co ^= E##mo; \
    349     E##mu =   Bmu ^((~Bma)&  Bme ); \
    350     Cu ^= E##mu; \
    351 \
    352     A##bi ^= Di; \
    353     Bsa = ROL64(A##bi, 62); \
    354     A##go ^= Do; \
    355     Bse = ROL64(A##go, 55); \
    356     A##ku ^= Du; \
    357     Bsi = ROL64(A##ku, 39); \
    358     A##ma ^= Da; \
    359     Bso = ROL64(A##ma, 41); \
    360     A##se ^= De; \
    361     Bsu = ROL64(A##se, 2); \
    362     E##sa =   Bsa ^((~Bse)&  Bsi ); \
    363     Ca ^= E##sa; \
    364     E##se =   Bse ^((~Bsi)&  Bso ); \
    365     Ce ^= E##se; \
    366     E##si =   Bsi ^((~Bso)&  Bsu ); \
    367     Ci ^= E##si; \
    368     E##so =   Bso ^((~Bsu)&  Bsa ); \
    369     Co ^= E##so; \
    370     E##su =   Bsu ^((~Bsa)&  Bse ); \
    371     Cu ^= E##su; \
    372 \
    373 
    374 /* --- Code for round */
    375 
    376 /* --- 64-bit lanes mapped to 64-bit words */
    377 
    378 #define thetaRhoPiChiIota(i, A, E) \
    379     Da = Cu^ROL64(Ce, 1); \
    380     De = Ca^ROL64(Ci, 1); \
    381     Di = Ce^ROL64(Co, 1); \
    382     Do = Ci^ROL64(Cu, 1); \
    383     Du = Co^ROL64(Ca, 1); \
    384 \
    385     A##ba ^= Da; \
    386     Bba = A##ba; \
    387     A##ge ^= De; \
    388     Bbe = ROL64(A##ge, 44); \
    389     A##ki ^= Di; \
    390     Bbi = ROL64(A##ki, 43); \
    391     A##mo ^= Do; \
    392     Bbo = ROL64(A##mo, 21); \
    393     A##su ^= Du; \
    394     Bbu = ROL64(A##su, 14); \
    395     E##ba =   Bba ^((~Bbe)&  Bbi ); \
    396     E##ba ^= KeccakF1600RoundConstants[i]; \
    397     E##be =   Bbe ^((~Bbi)&  Bbo ); \
    398     E##bi =   Bbi ^((~Bbo)&  Bbu ); \
    399     E##bo =   Bbo ^((~Bbu)&  Bba ); \
    400     E##bu =   Bbu ^((~Bba)&  Bbe ); \
    401 \
    402     A##bo ^= Do; \
    403     Bga = ROL64(A##bo, 28); \
    404     A##gu ^= Du; \
    405     Bge = ROL64(A##gu, 20); \
    406     A##ka ^= Da; \
    407     Bgi = ROL64(A##ka, 3); \
    408     A##me ^= De; \
    409     Bgo = ROL64(A##me, 45); \
    410     A##si ^= Di; \
    411     Bgu = ROL64(A##si, 61); \
    412     E##ga =   Bga ^((~Bge)&  Bgi ); \
    413     E##ge =   Bge ^((~Bgi)&  Bgo ); \
    414     E##gi =   Bgi ^((~Bgo)&  Bgu ); \
    415     E##go =   Bgo ^((~Bgu)&  Bga ); \
    416     E##gu =   Bgu ^((~Bga)&  Bge ); \
    417 \
    418     A##be ^= De; \
    419     Bka = ROL64(A##be, 1); \
    420     A##gi ^= Di; \
    421     Bke = ROL64(A##gi, 6); \
    422     A##ko ^= Do; \
    423     Bki = ROL64(A##ko, 25); \
    424     A##mu ^= Du; \
    425     Bko = ROL64(A##mu, 8); \
    426     A##sa ^= Da; \
    427     Bku = ROL64(A##sa, 18); \
    428     E##ka =   Bka ^((~Bke)&  Bki ); \
    429     E##ke =   Bke ^((~Bki)&  Bko ); \
    430     E##ki =   Bki ^((~Bko)&  Bku ); \
    431     E##ko =   Bko ^((~Bku)&  Bka ); \
    432     E##ku =   Bku ^((~Bka)&  Bke ); \
    433 \
    434     A##bu ^= Du; \
    435     Bma = ROL64(A##bu, 27); \
    436     A##ga ^= Da; \
    437     Bme = ROL64(A##ga, 36); \
    438     A##ke ^= De; \
    439     Bmi = ROL64(A##ke, 10); \
    440     A##mi ^= Di; \
    441     Bmo = ROL64(A##mi, 15); \
    442     A##so ^= Do; \
    443     Bmu = ROL64(A##so, 56); \
    444     E##ma =   Bma ^((~Bme)&  Bmi ); \
    445     E##me =   Bme ^((~Bmi)&  Bmo ); \
    446     E##mi =   Bmi ^((~Bmo)&  Bmu ); \
    447     E##mo =   Bmo ^((~Bmu)&  Bma ); \
    448     E##mu =   Bmu ^((~Bma)&  Bme ); \
    449 \
    450     A##bi ^= Di; \
    451     Bsa = ROL64(A##bi, 62); \
    452     A##go ^= Do; \
    453     Bse = ROL64(A##go, 55); \
    454     A##ku ^= Du; \
    455     Bsi = ROL64(A##ku, 39); \
    456     A##ma ^= Da; \
    457     Bso = ROL64(A##ma, 41); \
    458     A##se ^= De; \
    459     Bsu = ROL64(A##se, 2); \
    460     E##sa =   Bsa ^((~Bse)&  Bsi ); \
    461     E##se =   Bse ^((~Bsi)&  Bso ); \
    462     E##si =   Bsi ^((~Bso)&  Bsu ); \
    463     E##so =   Bso ^((~Bsu)&  Bsa ); \
    464     E##su =   Bsu ^((~Bsa)&  Bse ); \
    465 \
    466 
    467 #endif /* UseBebigokimisa */
    468 
    469 
    470 #define copyFromState(X, state) \
    471     X##ba = state[ 0]; \
    472     X##be = state[ 1]; \
    473     X##bi = state[ 2]; \
    474     X##bo = state[ 3]; \
    475     X##bu = state[ 4]; \
    476     X##ga = state[ 5]; \
    477     X##ge = state[ 6]; \
    478     X##gi = state[ 7]; \
    479     X##go = state[ 8]; \
    480     X##gu = state[ 9]; \
    481     X##ka = state[10]; \
    482     X##ke = state[11]; \
    483     X##ki = state[12]; \
    484     X##ko = state[13]; \
    485     X##ku = state[14]; \
    486     X##ma = state[15]; \
    487     X##me = state[16]; \
    488     X##mi = state[17]; \
    489     X##mo = state[18]; \
    490     X##mu = state[19]; \
    491     X##sa = state[20]; \
    492     X##se = state[21]; \
    493     X##si = state[22]; \
    494     X##so = state[23]; \
    495     X##su = state[24]; \
    496 
    497 #define copyToState(state, X) \
    498     state[ 0] = X##ba; \
    499     state[ 1] = X##be; \
    500     state[ 2] = X##bi; \
    501     state[ 3] = X##bo; \
    502     state[ 4] = X##bu; \
    503     state[ 5] = X##ga; \
    504     state[ 6] = X##ge; \
    505     state[ 7] = X##gi; \
    506     state[ 8] = X##go; \
    507     state[ 9] = X##gu; \
    508     state[10] = X##ka; \
    509     state[11] = X##ke; \
    510     state[12] = X##ki; \
    511     state[13] = X##ko; \
    512     state[14] = X##ku; \
    513     state[15] = X##ma; \
    514     state[16] = X##me; \
    515     state[17] = X##mi; \
    516     state[18] = X##mo; \
    517     state[19] = X##mu; \
    518     state[20] = X##sa; \
    519     state[21] = X##se; \
    520     state[22] = X##si; \
    521     state[23] = X##so; \
    522     state[24] = X##su; \
    523 
    524 #define copyStateVariables(X, Y) \
    525     X##ba = Y##ba; \
    526     X##be = Y##be; \
    527     X##bi = Y##bi; \
    528     X##bo = Y##bo; \
    529     X##bu = Y##bu; \
    530     X##ga = Y##ga; \
    531     X##ge = Y##ge; \
    532     X##gi = Y##gi; \
    533     X##go = Y##go; \
    534     X##gu = Y##gu; \
    535     X##ka = Y##ka; \
    536     X##ke = Y##ke; \
    537     X##ki = Y##ki; \
    538     X##ko = Y##ko; \
    539     X##ku = Y##ku; \
    540     X##ma = Y##ma; \
    541     X##me = Y##me; \
    542     X##mi = Y##mi; \
    543     X##mo = Y##mo; \
    544     X##mu = Y##mu; \
    545     X##sa = Y##sa; \
    546     X##se = Y##se; \
    547     X##si = Y##si; \
    548     X##so = Y##so; \
    549     X##su = Y##su; \
    550 
    551 #define copyFromStateAndAdd(X, state, input, laneCount) \
    552     if (laneCount < 16) { \
    553         if (laneCount < 8) { \
    554             if (laneCount < 4) { \
    555                 if (laneCount < 2) { \
    556                     if (laneCount < 1) { \
    557                         X##ba = state[ 0]; \
    558                     } \
    559                     else { \
    560                         X##ba = state[ 0]^input[ 0]; \
    561                     } \
    562                     X##be = state[ 1]; \
    563                     X##bi = state[ 2]; \
    564                 } \
    565                 else { \
    566                     X##ba = state[ 0]^input[ 0]; \
    567                     X##be = state[ 1]^input[ 1]; \
    568                     if (laneCount < 3) { \
    569                         X##bi = state[ 2]; \
    570                     } \
    571                     else { \
    572                         X##bi = state[ 2]^input[ 2]; \
    573                     } \
    574                 } \
    575                 X##bo = state[ 3]; \
    576                 X##bu = state[ 4]; \
    577                 X##ga = state[ 5]; \
    578                 X##ge = state[ 6]; \
    579             } \
    580             else { \
    581                 X##ba = state[ 0]^input[ 0]; \
    582                 X##be = state[ 1]^input[ 1]; \
    583                 X##bi = state[ 2]^input[ 2]; \
    584                 X##bo = state[ 3]^input[ 3]; \
    585                 if (laneCount < 6) { \
    586                     if (laneCount < 5) { \
    587                         X##bu = state[ 4]; \
    588                     } \
    589                     else { \
    590                         X##bu = state[ 4]^input[ 4]; \
    591                     } \
    592                     X##ga = state[ 5]; \
    593                     X##ge = state[ 6]; \
    594                 } \
    595                 else { \
    596                     X##bu = state[ 4]^input[ 4]; \
    597                     X##ga = state[ 5]^input[ 5]; \
    598                     if (laneCount < 7) { \
    599                         X##ge = state[ 6]; \
    600                     } \
    601                     else { \
    602                         X##ge = state[ 6]^input[ 6]; \
    603                     } \
    604                 } \
    605             } \
    606             X##gi = state[ 7]; \
    607             X##go = state[ 8]; \
    608             X##gu = state[ 9]; \
    609             X##ka = state[10]; \
    610             X##ke = state[11]; \
    611             X##ki = state[12]; \
    612             X##ko = state[13]; \
    613             X##ku = state[14]; \
    614         } \
    615         else { \
    616             X##ba = state[ 0]^input[ 0]; \
    617             X##be = state[ 1]^input[ 1]; \
    618             X##bi = state[ 2]^input[ 2]; \
    619             X##bo = state[ 3]^input[ 3]; \
    620             X##bu = state[ 4]^input[ 4]; \
    621             X##ga = state[ 5]^input[ 5]; \
    622             X##ge = state[ 6]^input[ 6]; \
    623             X##gi = state[ 7]^input[ 7]; \
    624             if (laneCount < 12) { \
    625                 if (laneCount < 10) { \
    626                     if (laneCount < 9) { \
    627                         X##go = state[ 8]; \
    628                     } \
    629                     else { \
    630                         X##go = state[ 8]^input[ 8]; \
    631                     } \
    632                     X##gu = state[ 9]; \
    633                     X##ka = state[10]; \
    634                 } \
    635                 else { \
    636                     X##go = state[ 8]^input[ 8]; \
    637                     X##gu = state[ 9]^input[ 9]; \
    638                     if (laneCount < 11) { \
    639                         X##ka = state[10]; \
    640                     } \
    641                     else { \
    642                         X##ka = state[10]^input[10]; \
    643                     } \
    644                 } \
    645                 X##ke = state[11]; \
    646                 X##ki = state[12]; \
    647                 X##ko = state[13]; \
    648                 X##ku = state[14]; \
    649             } \
    650             else { \
    651                 X##go = state[ 8]^input[ 8]; \
    652                 X##gu = state[ 9]^input[ 9]; \
    653                 X##ka = state[10]^input[10]; \
    654                 X##ke = state[11]^input[11]; \
    655                 if (laneCount < 14) { \
    656                     if (laneCount < 13) { \
    657                         X##ki = state[12]; \
    658                     } \
    659                     else { \
    660                         X##ki = state[12]^input[12]; \
    661                     } \
    662                     X##ko = state[13]; \
    663                     X##ku = state[14]; \
    664                 } \
    665                 else { \
    666                     X##ki = state[12]^input[12]; \
    667                     X##ko = state[13]^input[13]; \
    668                     if (laneCount < 15) { \
    669                         X##ku = state[14]; \
    670                     } \
    671                     else { \
    672                         X##ku = state[14]^input[14]; \
    673                     } \
    674                 } \
    675             } \
    676         } \
    677         X##ma = state[15]; \
    678         X##me = state[16]; \
    679         X##mi = state[17]; \
    680         X##mo = state[18]; \
    681         X##mu = state[19]; \
    682         X##sa = state[20]; \
    683         X##se = state[21]; \
    684         X##si = state[22]; \
    685         X##so = state[23]; \
    686         X##su = state[24]; \
    687     } \
    688     else { \
    689         X##ba = state[ 0]^input[ 0]; \
    690         X##be = state[ 1]^input[ 1]; \
    691         X##bi = state[ 2]^input[ 2]; \
    692         X##bo = state[ 3]^input[ 3]; \
    693         X##bu = state[ 4]^input[ 4]; \
    694         X##ga = state[ 5]^input[ 5]; \
    695         X##ge = state[ 6]^input[ 6]; \
    696         X##gi = state[ 7]^input[ 7]; \
    697         X##go = state[ 8]^input[ 8]; \
    698         X##gu = state[ 9]^input[ 9]; \
    699         X##ka = state[10]^input[10]; \
    700         X##ke = state[11]^input[11]; \
    701         X##ki = state[12]^input[12]; \
    702         X##ko = state[13]^input[13]; \
    703         X##ku = state[14]^input[14]; \
    704         X##ma = state[15]^input[15]; \
    705         if (laneCount < 24) { \
    706             if (laneCount < 20) { \
    707                 if (laneCount < 18) { \
    708                     if (laneCount < 17) { \
    709                         X##me = state[16]; \
    710                     } \
    711                     else { \
    712                         X##me = state[16]^input[16]; \
    713                     } \
    714                     X##mi = state[17]; \
    715                     X##mo = state[18]; \
    716                 } \
    717                 else { \
    718                     X##me = state[16]^input[16]; \
    719                     X##mi = state[17]^input[17]; \
    720                     if (laneCount < 19) { \
    721                         X##mo = state[18]; \
    722                     } \
    723                     else { \
    724                         X##mo = state[18]^input[18]; \
    725                     } \
    726                 } \
    727                 X##mu = state[19]; \
    728                 X##sa = state[20]; \
    729                 X##se = state[21]; \
    730                 X##si = state[22]; \
    731             } \
    732             else { \
    733                 X##me = state[16]^input[16]; \
    734                 X##mi = state[17]^input[17]; \
    735                 X##mo = state[18]^input[18]; \
    736                 X##mu = state[19]^input[19]; \
    737                 if (laneCount < 22) { \
    738                     if (laneCount < 21) { \
    739                         X##sa = state[20]; \
    740                     } \
    741                     else { \
    742                         X##sa = state[20]^input[20]; \
    743                     } \
    744                     X##se = state[21]; \
    745                     X##si = state[22]; \
    746                 } \
    747                 else { \
    748                     X##sa = state[20]^input[20]; \
    749                     X##se = state[21]^input[21]; \
    750                     if (laneCount < 23) { \
    751                         X##si = state[22]; \
    752                     } \
    753                     else { \
    754                         X##si = state[22]^input[22]; \
    755                     } \
    756                 } \
    757             } \
    758             X##so = state[23]; \
    759             X##su = state[24]; \
    760         } \
    761         else { \
    762             X##me = state[16]^input[16]; \
    763             X##mi = state[17]^input[17]; \
    764             X##mo = state[18]^input[18]; \
    765             X##mu = state[19]^input[19]; \
    766             X##sa = state[20]^input[20]; \
    767             X##se = state[21]^input[21]; \
    768             X##si = state[22]^input[22]; \
    769             X##so = state[23]^input[23]; \
    770             if (laneCount < 25) { \
    771                 X##su = state[24]; \
    772             } \
    773             else { \
    774                 X##su = state[24]^input[24]; \
    775             } \
    776         } \
    777     }
    778 
    779 #define addInput(X, input, laneCount) \
    780     if (laneCount == 21) { \
    781         X##ba ^= input[ 0]; \
    782         X##be ^= input[ 1]; \
    783         X##bi ^= input[ 2]; \
    784         X##bo ^= input[ 3]; \
    785         X##bu ^= input[ 4]; \
    786         X##ga ^= input[ 5]; \
    787         X##ge ^= input[ 6]; \
    788         X##gi ^= input[ 7]; \
    789         X##go ^= input[ 8]; \
    790         X##gu ^= input[ 9]; \
    791         X##ka ^= input[10]; \
    792         X##ke ^= input[11]; \
    793         X##ki ^= input[12]; \
    794         X##ko ^= input[13]; \
    795         X##ku ^= input[14]; \
    796         X##ma ^= input[15]; \
    797         X##me ^= input[16]; \
    798         X##mi ^= input[17]; \
    799         X##mo ^= input[18]; \
    800         X##mu ^= input[19]; \
    801         X##sa ^= input[20]; \
    802     } \
    803     else if (laneCount < 16) { \
    804         if (laneCount < 8) { \
    805             if (laneCount < 4) { \
    806                 if (laneCount < 2) { \
    807                     if (laneCount < 1) { \
    808                     } \
    809                     else { \
    810                         X##ba ^= input[ 0]; \
    811                     } \
    812                 } \
    813                 else { \
    814                     X##ba ^= input[ 0]; \
    815                     X##be ^= input[ 1]; \
    816                     if (laneCount < 3) { \
    817                     } \
    818                     else { \
    819                         X##bi ^= input[ 2]; \
    820                     } \
    821                 } \
    822             } \
    823             else { \
    824                 X##ba ^= input[ 0]; \
    825                 X##be ^= input[ 1]; \
    826                 X##bi ^= input[ 2]; \
    827                 X##bo ^= input[ 3]; \
    828                 if (laneCount < 6) { \
    829                     if (laneCount < 5) { \
    830                     } \
    831                     else { \
    832                         X##bu ^= input[ 4]; \
    833                     } \
    834                 } \
    835                 else { \
    836                     X##bu ^= input[ 4]; \
    837                     X##ga ^= input[ 5]; \
    838                     if (laneCount < 7) { \
    839                     } \
    840                     else { \
    841                         X##ge ^= input[ 6]; \
    842                     } \
    843                 } \
    844             } \
    845         } \
    846         else { \
    847             X##ba ^= input[ 0]; \
    848             X##be ^= input[ 1]; \
    849             X##bi ^= input[ 2]; \
    850             X##bo ^= input[ 3]; \
    851             X##bu ^= input[ 4]; \
    852             X##ga ^= input[ 5]; \
    853             X##ge ^= input[ 6]; \
    854             X##gi ^= input[ 7]; \
    855             if (laneCount < 12) { \
    856                 if (laneCount < 10) { \
    857                     if (laneCount < 9) { \
    858                     } \
    859                     else { \
    860                         X##go ^= input[ 8]; \
    861                     } \
    862                 } \
    863                 else { \
    864                     X##go ^= input[ 8]; \
    865                     X##gu ^= input[ 9]; \
    866                     if (laneCount < 11) { \
    867                     } \
    868                     else { \
    869                         X##ka ^= input[10]; \
    870                     } \
    871                 } \
    872             } \
    873             else { \
    874                 X##go ^= input[ 8]; \
    875                 X##gu ^= input[ 9]; \
    876                 X##ka ^= input[10]; \
    877                 X##ke ^= input[11]; \
    878                 if (laneCount < 14) { \
    879                     if (laneCount < 13) { \
    880                     } \
    881                     else { \
    882                         X##ki ^= input[12]; \
    883                     } \
    884                 } \
    885                 else { \
    886                     X##ki ^= input[12]; \
    887                     X##ko ^= input[13]; \
    888                     if (laneCount < 15) { \
    889                     } \
    890                     else { \
    891                         X##ku ^= input[14]; \
    892                     } \
    893                 } \
    894             } \
    895         } \
    896     } \
    897     else { \
    898         X##ba ^= input[ 0]; \
    899         X##be ^= input[ 1]; \
    900         X##bi ^= input[ 2]; \
    901         X##bo ^= input[ 3]; \
    902         X##bu ^= input[ 4]; \
    903         X##ga ^= input[ 5]; \
    904         X##ge ^= input[ 6]; \
    905         X##gi ^= input[ 7]; \
    906         X##go ^= input[ 8]; \
    907         X##gu ^= input[ 9]; \
    908         X##ka ^= input[10]; \
    909         X##ke ^= input[11]; \
    910         X##ki ^= input[12]; \
    911         X##ko ^= input[13]; \
    912         X##ku ^= input[14]; \
    913         X##ma ^= input[15]; \
    914         if (laneCount < 24) { \
    915             if (laneCount < 20) { \
    916                 if (laneCount < 18) { \
    917                     if (laneCount < 17) { \
    918                     } \
    919                     else { \
    920                         X##me ^= input[16]; \
    921                     } \
    922                 } \
    923                 else { \
    924                     X##me ^= input[16]; \
    925                     X##mi ^= input[17]; \
    926                     if (laneCount < 19) { \
    927                     } \
    928                     else { \
    929                         X##mo ^= input[18]; \
    930                     } \
    931                 } \
    932             } \
    933             else { \
    934                 X##me ^= input[16]; \
    935                 X##mi ^= input[17]; \
    936                 X##mo ^= input[18]; \
    937                 X##mu ^= input[19]; \
    938                 if (laneCount < 22) { \
    939                     if (laneCount < 21) { \
    940                     } \
    941                     else { \
    942                         X##sa ^= input[20]; \
    943                     } \
    944                 } \
    945                 else { \
    946                     X##sa ^= input[20]; \
    947                     X##se ^= input[21]; \
    948                     if (laneCount < 23) { \
    949                     } \
    950                     else { \
    951                         X##si ^= input[22]; \
    952                     } \
    953                 } \
    954             } \
    955         } \
    956         else { \
    957             X##me ^= input[16]; \
    958             X##mi ^= input[17]; \
    959             X##mo ^= input[18]; \
    960             X##mu ^= input[19]; \
    961             X##sa ^= input[20]; \
    962             X##se ^= input[21]; \
    963             X##si ^= input[22]; \
    964             X##so ^= input[23]; \
    965             if (laneCount < 25) { \
    966             } \
    967             else { \
    968                 X##su ^= input[24]; \
    969             } \
    970         } \
    971     }
    972 
    973 #ifdef UseBebigokimisa
    974 
    975 #define copyToStateAndOutput(X, state, output, laneCount) \
    976     if (laneCount < 16) { \
    977         if (laneCount < 8) { \
    978             if (laneCount < 4) { \
    979                 if (laneCount < 2) { \
    980                     state[ 0] = X##ba; \
    981                     if (laneCount >= 1) { \
    982                         output[ 0] = X##ba; \
    983                     } \
    984                     state[ 1] = X##be; \
    985                     state[ 2] = X##bi; \
    986                 } \
    987                 else { \
    988                     state[ 0] = X##ba; \
    989                     output[ 0] = X##ba; \
    990                     state[ 1] = X##be; \
    991                     output[ 1] = ~X##be; \
    992                     state[ 2] = X##bi; \
    993                     if (laneCount >= 3) { \
    994                         output[ 2] = ~X##bi; \
    995                     } \
    996                 } \
    997                 state[ 3] = X##bo; \
    998                 state[ 4] = X##bu; \
    999                 state[ 5] = X##ga; \
   1000                 state[ 6] = X##ge; \
   1001             } \
   1002             else { \
   1003                 state[ 0] = X##ba; \
   1004                 output[ 0] = X##ba; \
   1005                 state[ 1] = X##be; \
   1006                 output[ 1] = ~X##be; \
   1007                 state[ 2] = X##bi; \
   1008                 output[ 2] = ~X##bi; \
   1009                 state[ 3] = X##bo; \
   1010                 output[ 3] = X##bo; \
   1011                 if (laneCount < 6) { \
   1012                     state[ 4] = X##bu; \
   1013                     if (laneCount >= 5) { \
   1014                         output[ 4] = X##bu; \
   1015                     } \
   1016                     state[ 5] = X##ga; \
   1017                     state[ 6] = X##ge; \
   1018                 } \
   1019                 else { \
   1020                     state[ 4] = X##bu; \
   1021                     output[ 4] = X##bu; \
   1022                     state[ 5] = X##ga; \
   1023                     output[ 5] = X##ga; \
   1024                     state[ 6] = X##ge; \
   1025                     if (laneCount >= 7) { \
   1026                         output[ 6] = X##ge; \
   1027                     } \
   1028                 } \
   1029             } \
   1030             state[ 7] = X##gi; \
   1031             state[ 8] = X##go; \
   1032             state[ 9] = X##gu; \
   1033             state[10] = X##ka; \
   1034             state[11] = X##ke; \
   1035             state[12] = X##ki; \
   1036             state[13] = X##ko; \
   1037             state[14] = X##ku; \
   1038         } \
   1039         else { \
   1040             state[ 0] = X##ba; \
   1041             output[ 0] = X##ba; \
   1042             state[ 1] = X##be; \
   1043             output[ 1] = ~X##be; \
   1044             state[ 2] = X##bi; \
   1045             output[ 2] = ~X##bi; \
   1046             state[ 3] = X##bo; \
   1047             output[ 3] = X##bo; \
   1048             state[ 4] = X##bu; \
   1049             output[ 4] = X##bu; \
   1050             state[ 5] = X##ga; \
   1051             output[ 5] = X##ga; \
   1052             state[ 6] = X##ge; \
   1053             output[ 6] = X##ge; \
   1054             state[ 7] = X##gi; \
   1055             output[ 7] = X##gi; \
   1056             if (laneCount < 12) { \
   1057                 if (laneCount < 10) { \
   1058                     state[ 8] = X##go; \
   1059                     if (laneCount >= 9) { \
   1060                         output[ 8] = ~X##go; \
   1061                     } \
   1062                     state[ 9] = X##gu; \
   1063                     state[10] = X##ka; \
   1064                 } \
   1065                 else { \
   1066                     state[ 8] = X##go; \
   1067                     output[ 8] = ~X##go; \
   1068                     state[ 9] = X##gu; \
   1069                     output[ 9] = X##gu; \
   1070                     state[10] = X##ka; \
   1071                     if (laneCount >= 11) { \
   1072                         output[10] = X##ka; \
   1073                     } \
   1074                 } \
   1075                 state[11] = X##ke; \
   1076                 state[12] = X##ki; \
   1077                 state[13] = X##ko; \
   1078                 state[14] = X##ku; \
   1079             } \
   1080             else { \
   1081                 state[ 8] = X##go; \
   1082                 output[ 8] = ~X##go; \
   1083                 state[ 9] = X##gu; \
   1084                 output[ 9] = X##gu; \
   1085                 state[10] = X##ka; \
   1086                 output[10] = X##ka; \
   1087                 state[11] = X##ke; \
   1088                 output[11] = X##ke; \
   1089                 if (laneCount < 14) { \
   1090                     state[12] = X##ki; \
   1091                     if (laneCount >= 13) { \
   1092                         output[12] = ~X##ki; \
   1093                     } \
   1094                     state[13] = X##ko; \
   1095                     state[14] = X##ku; \
   1096                 } \
   1097                 else { \
   1098                     state[12] = X##ki; \
   1099                     output[12] = ~X##ki; \
   1100                     state[13] = X##ko; \
   1101                     output[13] = X##ko; \
   1102                     state[14] = X##ku; \
   1103                     if (laneCount >= 15) { \
   1104                         output[14] = X##ku; \
   1105                     } \
   1106                 } \
   1107             } \
   1108         } \
   1109         state[15] = X##ma; \
   1110         state[16] = X##me; \
   1111         state[17] = X##mi; \
   1112         state[18] = X##mo; \
   1113         state[19] = X##mu; \
   1114         state[20] = X##sa; \
   1115         state[21] = X##se; \
   1116         state[22] = X##si; \
   1117         state[23] = X##so; \
   1118         state[24] = X##su; \
   1119     } \
   1120     else { \
   1121         state[ 0] = X##ba; \
   1122         output[ 0] = X##ba; \
   1123         state[ 1] = X##be; \
   1124         output[ 1] = ~X##be; \
   1125         state[ 2] = X##bi; \
   1126         output[ 2] = ~X##bi; \
   1127         state[ 3] = X##bo; \
   1128         output[ 3] = X##bo; \
   1129         state[ 4] = X##bu; \
   1130         output[ 4] = X##bu; \
   1131         state[ 5] = X##ga; \
   1132         output[ 5] = X##ga; \
   1133         state[ 6] = X##ge; \
   1134         output[ 6] = X##ge; \
   1135         state[ 7] = X##gi; \
   1136         output[ 7] = X##gi; \
   1137         state[ 8] = X##go; \
   1138         output[ 8] = ~X##go; \
   1139         state[ 9] = X##gu; \
   1140         output[ 9] = X##gu; \
   1141         state[10] = X##ka; \
   1142         output[10] = X##ka; \
   1143         state[11] = X##ke; \
   1144         output[11] = X##ke; \
   1145         state[12] = X##ki; \
   1146         output[12] = ~X##ki; \
   1147         state[13] = X##ko; \
   1148         output[13] = X##ko; \
   1149         state[14] = X##ku; \
   1150         output[14] = X##ku; \
   1151         state[15] = X##ma; \
   1152         output[15] = X##ma; \
   1153         if (laneCount < 24) { \
   1154             if (laneCount < 20) { \
   1155                 if (laneCount < 18) { \
   1156                     state[16] = X##me; \
   1157                     if (laneCount >= 17) { \
   1158                         output[16] = X##me; \
   1159                     } \
   1160                     state[17] = X##mi; \
   1161                     state[18] = X##mo; \
   1162                 } \
   1163                 else { \
   1164                     state[16] = X##me; \
   1165                     output[16] = X##me; \
   1166                     state[17] = X##mi; \
   1167                     output[17] = ~X##mi; \
   1168                     state[18] = X##mo; \
   1169                     if (laneCount >= 19) { \
   1170                         output[18] = X##mo; \
   1171                     } \
   1172                 } \
   1173                 state[19] = X##mu; \
   1174                 state[20] = X##sa; \
   1175                 state[21] = X##se; \
   1176                 state[22] = X##si; \
   1177             } \
   1178             else { \
   1179                 state[16] = X##me; \
   1180                 output[16] = X##me; \
   1181                 state[17] = X##mi; \
   1182                 output[17] = ~X##mi; \
   1183                 state[18] = X##mo; \
   1184                 output[18] = X##mo; \
   1185                 state[19] = X##mu; \
   1186                 output[19] = X##mu; \
   1187                 if (laneCount < 22) { \
   1188                     state[20] = X##sa; \
   1189                     if (laneCount >= 21) { \
   1190                         output[20] = ~X##sa; \
   1191                     } \
   1192                     state[21] = X##se; \
   1193                     state[22] = X##si; \
   1194                 } \
   1195                 else { \
   1196                     state[20] = X##sa; \
   1197                     output[20] = ~X##sa; \
   1198                     state[21] = X##se; \
   1199                     output[21] = X##se; \
   1200                     state[22] = X##si; \
   1201                     if (laneCount >= 23) { \
   1202                         output[22] = X##si; \
   1203                     } \
   1204                 } \
   1205             } \
   1206             state[23] = X##so; \
   1207             state[24] = X##su; \
   1208         } \
   1209         else { \
   1210             state[16] = X##me; \
   1211             output[16] = X##me; \
   1212             state[17] = X##mi; \
   1213             output[17] = ~X##mi; \
   1214             state[18] = X##mo; \
   1215             output[18] = X##mo; \
   1216             state[19] = X##mu; \
   1217             output[19] = X##mu; \
   1218             state[20] = X##sa; \
   1219             output[20] = ~X##sa; \
   1220             state[21] = X##se; \
   1221             output[21] = X##se; \
   1222             state[22] = X##si; \
   1223             output[22] = X##si; \
   1224             state[23] = X##so; \
   1225             output[23] = X##so; \
   1226             state[24] = X##su; \
   1227             if (laneCount >= 25) { \
   1228                 output[24] = X##su; \
   1229             } \
   1230         } \
   1231     }
   1232 
   1233 #define output(X, output, laneCount) \
   1234     if (laneCount < 16) { \
   1235         if (laneCount < 8) { \
   1236             if (laneCount < 4) { \
   1237                 if (laneCount < 2) { \
   1238                     if (laneCount >= 1) { \
   1239                         output[ 0] = X##ba; \
   1240                     } \
   1241                 } \
   1242                 else { \
   1243                     output[ 0] = X##ba; \
   1244                     output[ 1] = ~X##be; \
   1245                     if (laneCount >= 3) { \
   1246                         output[ 2] = ~X##bi; \
   1247                     } \
   1248                 } \
   1249             } \
   1250             else { \
   1251                 output[ 0] = X##ba; \
   1252                 output[ 1] = ~X##be; \
   1253                 output[ 2] = ~X##bi; \
   1254                 output[ 3] = X##bo; \
   1255                 if (laneCount < 6) { \
   1256                     if (laneCount >= 5) { \
   1257                         output[ 4] = X##bu; \
   1258                     } \
   1259                 } \
   1260                 else { \
   1261                     output[ 4] = X##bu; \
   1262                     output[ 5] = X##ga; \
   1263                     if (laneCount >= 7) { \
   1264                         output[ 6] = X##ge; \
   1265                     } \
   1266                 } \
   1267             } \
   1268         } \
   1269         else { \
   1270             output[ 0] = X##ba; \
   1271             output[ 1] = ~X##be; \
   1272             output[ 2] = ~X##bi; \
   1273             output[ 3] = X##bo; \
   1274             output[ 4] = X##bu; \
   1275             output[ 5] = X##ga; \
   1276             output[ 6] = X##ge; \
   1277             output[ 7] = X##gi; \
   1278             if (laneCount < 12) { \
   1279                 if (laneCount < 10) { \
   1280                     if (laneCount >= 9) { \
   1281                         output[ 8] = ~X##go; \
   1282                     } \
   1283                 } \
   1284                 else { \
   1285                     output[ 8] = ~X##go; \
   1286                     output[ 9] = X##gu; \
   1287                     if (laneCount >= 11) { \
   1288                         output[10] = X##ka; \
   1289                     } \
   1290                 } \
   1291             } \
   1292             else { \
   1293                 output[ 8] = ~X##go; \
   1294                 output[ 9] = X##gu; \
   1295                 output[10] = X##ka; \
   1296                 output[11] = X##ke; \
   1297                 if (laneCount < 14) { \
   1298                     if (laneCount >= 13) { \
   1299                         output[12] = ~X##ki; \
   1300                     } \
   1301                 } \
   1302                 else { \
   1303                     output[12] = ~X##ki; \
   1304                     output[13] = X##ko; \
   1305                     if (laneCount >= 15) { \
   1306                         output[14] = X##ku; \
   1307                     } \
   1308                 } \
   1309             } \
   1310         } \
   1311     } \
   1312     else { \
   1313         output[ 0] = X##ba; \
   1314         output[ 1] = ~X##be; \
   1315         output[ 2] = ~X##bi; \
   1316         output[ 3] = X##bo; \
   1317         output[ 4] = X##bu; \
   1318         output[ 5] = X##ga; \
   1319         output[ 6] = X##ge; \
   1320         output[ 7] = X##gi; \
   1321         output[ 8] = ~X##go; \
   1322         output[ 9] = X##gu; \
   1323         output[10] = X##ka; \
   1324         output[11] = X##ke; \
   1325         output[12] = ~X##ki; \
   1326         output[13] = X##ko; \
   1327         output[14] = X##ku; \
   1328         output[15] = X##ma; \
   1329         if (laneCount < 24) { \
   1330             if (laneCount < 20) { \
   1331                 if (laneCount < 18) { \
   1332                     if (laneCount >= 17) { \
   1333                         output[16] = X##me; \
   1334                     } \
   1335                 } \
   1336                 else { \
   1337                     output[16] = X##me; \
   1338                     output[17] = ~X##mi; \
   1339                     if (laneCount >= 19) { \
   1340                         output[18] = X##mo; \
   1341                     } \
   1342                 } \
   1343             } \
   1344             else { \
   1345                 output[16] = X##me; \
   1346                 output[17] = ~X##mi; \
   1347                 output[18] = X##mo; \
   1348                 output[19] = X##mu; \
   1349                 if (laneCount < 22) { \
   1350                     if (laneCount >= 21) { \
   1351                         output[20] = ~X##sa; \
   1352                     } \
   1353                 } \
   1354                 else { \
   1355                     output[20] = ~X##sa; \
   1356                     output[21] = X##se; \
   1357                     if (laneCount >= 23) { \
   1358                         output[22] = X##si; \
   1359                     } \
   1360                 } \
   1361             } \
   1362         } \
   1363         else { \
   1364             output[16] = X##me; \
   1365             output[17] = ~X##mi; \
   1366             output[18] = X##mo; \
   1367             output[19] = X##mu; \
   1368             output[20] = ~X##sa; \
   1369             output[21] = X##se; \
   1370             output[22] = X##si; \
   1371             output[23] = X##so; \
   1372             if (laneCount >= 25) { \
   1373                 output[24] = X##su; \
   1374             } \
   1375         } \
   1376     }
   1377 
   1378 #define wrapOne(X, input, output, index, name) \
   1379     X##name ^= input[index]; \
   1380     output[index] = X##name;
   1381 
   1382 #define wrapOneInvert(X, input, output, index, name) \
   1383     X##name ^= input[index]; \
   1384     output[index] = ~X##name;
   1385 
   1386 #define unwrapOne(X, input, output, index, name) \
   1387     output[index] = input[index] ^ X##name; \
   1388     X##name ^= output[index];
   1389 
   1390 #define unwrapOneInvert(X, input, output, index, name) \
   1391     output[index] = ~(input[index] ^ X##name); \
   1392     X##name ^= output[index]; \
   1393 
   1394 #else /* UseBebigokimisa */
   1395 
   1396 
   1397 #define copyToStateAndOutput(X, state, output, laneCount) \
   1398     if (laneCount < 16) { \
   1399         if (laneCount < 8) { \
   1400             if (laneCount < 4) { \
   1401                 if (laneCount < 2) { \
   1402                     state[ 0] = X##ba; \
   1403                     if (laneCount >= 1) { \
   1404                         output[ 0] = X##ba; \
   1405                     } \
   1406                     state[ 1] = X##be; \
   1407                     state[ 2] = X##bi; \
   1408                 } \
   1409                 else { \
   1410                     state[ 0] = X##ba; \
   1411                     output[ 0] = X##ba; \
   1412                     state[ 1] = X##be; \
   1413                     output[ 1] = X##be; \
   1414                     state[ 2] = X##bi; \
   1415                     if (laneCount >= 3) { \
   1416                         output[ 2] = X##bi; \
   1417                     } \
   1418                 } \
   1419                 state[ 3] = X##bo; \
   1420                 state[ 4] = X##bu; \
   1421                 state[ 5] = X##ga; \
   1422                 state[ 6] = X##ge; \
   1423             } \
   1424             else { \
   1425                 state[ 0] = X##ba; \
   1426                 output[ 0] = X##ba; \
   1427                 state[ 1] = X##be; \
   1428                 output[ 1] = X##be; \
   1429                 state[ 2] = X##bi; \
   1430                 output[ 2] = X##bi; \
   1431                 state[ 3] = X##bo; \
   1432                 output[ 3] = X##bo; \
   1433                 if (laneCount < 6) { \
   1434                     state[ 4] = X##bu; \
   1435                     if (laneCount >= 5) { \
   1436                         output[ 4] = X##bu; \
   1437                     } \
   1438                     state[ 5] = X##ga; \
   1439                     state[ 6] = X##ge; \
   1440                 } \
   1441                 else { \
   1442                     state[ 4] = X##bu; \
   1443                     output[ 4] = X##bu; \
   1444                     state[ 5] = X##ga; \
   1445                     output[ 5] = X##ga; \
   1446                     state[ 6] = X##ge; \
   1447                     if (laneCount >= 7) { \
   1448                         output[ 6] = X##ge; \
   1449                     } \
   1450                 } \
   1451             } \
   1452             state[ 7] = X##gi; \
   1453             state[ 8] = X##go; \
   1454             state[ 9] = X##gu; \
   1455             state[10] = X##ka; \
   1456             state[11] = X##ke; \
   1457             state[12] = X##ki; \
   1458             state[13] = X##ko; \
   1459             state[14] = X##ku; \
   1460         } \
   1461         else { \
   1462             state[ 0] = X##ba; \
   1463             output[ 0] = X##ba; \
   1464             state[ 1] = X##be; \
   1465             output[ 1] = X##be; \
   1466             state[ 2] = X##bi; \
   1467             output[ 2] = X##bi; \
   1468             state[ 3] = X##bo; \
   1469             output[ 3] = X##bo; \
   1470             state[ 4] = X##bu; \
   1471             output[ 4] = X##bu; \
   1472             state[ 5] = X##ga; \
   1473             output[ 5] = X##ga; \
   1474             state[ 6] = X##ge; \
   1475             output[ 6] = X##ge; \
   1476             state[ 7] = X##gi; \
   1477             output[ 7] = X##gi; \
   1478             if (laneCount < 12) { \
   1479                 if (laneCount < 10) { \
   1480                     state[ 8] = X##go; \
   1481                     if (laneCount >= 9) { \
   1482                         output[ 8] = X##go; \
   1483                     } \
   1484                     state[ 9] = X##gu; \
   1485                     state[10] = X##ka; \
   1486                 } \
   1487                 else { \
   1488                     state[ 8] = X##go; \
   1489                     output[ 8] = X##go; \
   1490                     state[ 9] = X##gu; \
   1491                     output[ 9] = X##gu; \
   1492                     state[10] = X##ka; \
   1493                     if (laneCount >= 11) { \
   1494                         output[10] = X##ka; \
   1495                     } \
   1496                 } \
   1497                 state[11] = X##ke; \
   1498                 state[12] = X##ki; \
   1499                 state[13] = X##ko; \
   1500                 state[14] = X##ku; \
   1501             } \
   1502             else { \
   1503                 state[ 8] = X##go; \
   1504                 output[ 8] = X##go; \
   1505                 state[ 9] = X##gu; \
   1506                 output[ 9] = X##gu; \
   1507                 state[10] = X##ka; \
   1508                 output[10] = X##ka; \
   1509                 state[11] = X##ke; \
   1510                 output[11] = X##ke; \
   1511                 if (laneCount < 14) { \
   1512                     state[12] = X##ki; \
   1513                     if (laneCount >= 13) { \
   1514                         output[12]= X##ki; \
   1515                     } \
   1516                     state[13] = X##ko; \
   1517                     state[14] = X##ku; \
   1518                 } \
   1519                 else { \
   1520                     state[12] = X##ki; \
   1521                     output[12]= X##ki; \
   1522                     state[13] = X##ko; \
   1523                     output[13] = X##ko; \
   1524                     state[14] = X##ku; \
   1525                     if (laneCount >= 15) { \
   1526                         output[14] = X##ku; \
   1527                     } \
   1528                 } \
   1529             } \
   1530         } \
   1531         state[15] = X##ma; \
   1532         state[16] = X##me; \
   1533         state[17] = X##mi; \
   1534         state[18] = X##mo; \
   1535         state[19] = X##mu; \
   1536         state[20] = X##sa; \
   1537         state[21] = X##se; \
   1538         state[22] = X##si; \
   1539         state[23] = X##so; \
   1540         state[24] = X##su; \
   1541     } \
   1542     else { \
   1543         state[ 0] = X##ba; \
   1544         output[ 0] = X##ba; \
   1545         state[ 1] = X##be; \
   1546         output[ 1] = X##be; \
   1547         state[ 2] = X##bi; \
   1548         output[ 2] = X##bi; \
   1549         state[ 3] = X##bo; \
   1550         output[ 3] = X##bo; \
   1551         state[ 4] = X##bu; \
   1552         output[ 4] = X##bu; \
   1553         state[ 5] = X##ga; \
   1554         output[ 5] = X##ga; \
   1555         state[ 6] = X##ge; \
   1556         output[ 6] = X##ge; \
   1557         state[ 7] = X##gi; \
   1558         output[ 7] = X##gi; \
   1559         state[ 8] = X##go; \
   1560         output[ 8] = X##go; \
   1561         state[ 9] = X##gu; \
   1562         output[ 9] = X##gu; \
   1563         state[10] = X##ka; \
   1564         output[10] = X##ka; \
   1565         state[11] = X##ke; \
   1566         output[11] = X##ke; \
   1567         state[12] = X##ki; \
   1568         output[12]= X##ki; \
   1569         state[13] = X##ko; \
   1570         output[13] = X##ko; \
   1571         state[14] = X##ku; \
   1572         output[14] = X##ku; \
   1573         state[15] = X##ma; \
   1574         output[15] = X##ma; \
   1575         if (laneCount < 24) { \
   1576             if (laneCount < 20) { \
   1577                 if (laneCount < 18) { \
   1578                     state[16] = X##me; \
   1579                     if (laneCount >= 17) { \
   1580                         output[16] = X##me; \
   1581                     } \
   1582                     state[17] = X##mi; \
   1583                     state[18] = X##mo; \
   1584                 } \
   1585                 else { \
   1586                     state[16] = X##me; \
   1587                     output[16] = X##me; \
   1588                     state[17] = X##mi; \
   1589                     output[17] = X##mi; \
   1590                     state[18] = X##mo; \
   1591                     if (laneCount >= 19) { \
   1592                         output[18] = X##mo; \
   1593                     } \
   1594                 } \
   1595                 state[19] = X##mu; \
   1596                 state[20] = X##sa; \
   1597                 state[21] = X##se; \
   1598                 state[22] = X##si; \
   1599             } \
   1600             else { \
   1601                 state[16] = X##me; \
   1602                 output[16] = X##me; \
   1603                 state[17] = X##mi; \
   1604                 output[17] = X##mi; \
   1605                 state[18] = X##mo; \
   1606                 output[18] = X##mo; \
   1607                 state[19] = X##mu; \
   1608                 output[19] = X##mu; \
   1609                 if (laneCount < 22) { \
   1610                     state[20] = X##sa; \
   1611                     if (laneCount >= 21) { \
   1612                         output[20] = X##sa; \
   1613                     } \
   1614                     state[21] = X##se; \
   1615                     state[22] = X##si; \
   1616                 } \
   1617                 else { \
   1618                     state[20] = X##sa; \
   1619                     output[20] = X##sa; \
   1620                     state[21] = X##se; \
   1621                     output[21] = X##se; \
   1622                     state[22] = X##si; \
   1623                     if (laneCount >= 23) { \
   1624                         output[22] = X##si; \
   1625                     } \
   1626                 } \
   1627             } \
   1628             state[23] = X##so; \
   1629             state[24] = X##su; \
   1630         } \
   1631         else { \
   1632             state[16] = X##me; \
   1633             output[16] = X##me; \
   1634             state[17] = X##mi; \
   1635             output[17] = X##mi; \
   1636             state[18] = X##mo; \
   1637             output[18] = X##mo; \
   1638             state[19] = X##mu; \
   1639             output[19] = X##mu; \
   1640             state[20] = X##sa; \
   1641             output[20] = X##sa; \
   1642             state[21] = X##se; \
   1643             output[21] = X##se; \
   1644             state[22] = X##si; \
   1645             output[22] = X##si; \
   1646             state[23] = X##so; \
   1647             output[23] = X##so; \
   1648             state[24] = X##su; \
   1649             if (laneCount >= 25) { \
   1650                 output[24] = X##su; \
   1651             } \
   1652         } \
   1653     }
   1654 
   1655 #define output(X, output, laneCount) \
   1656     if (laneCount < 16) { \
   1657         if (laneCount < 8) { \
   1658             if (laneCount < 4) { \
   1659                 if (laneCount < 2) { \
   1660                     if (laneCount >= 1) { \
   1661                         output[ 0] = X##ba; \
   1662                     } \
   1663                 } \
   1664                 else { \
   1665                     output[ 0] = X##ba; \
   1666                     output[ 1] = X##be; \
   1667                     if (laneCount >= 3) { \
   1668                         output[ 2] = X##bi; \
   1669                     } \
   1670                 } \
   1671             } \
   1672             else { \
   1673                 output[ 0] = X##ba; \
   1674                 output[ 1] = X##be; \
   1675                 output[ 2] = X##bi; \
   1676                 output[ 3] = X##bo; \
   1677                 if (laneCount < 6) { \
   1678                     if (laneCount >= 5) { \
   1679                         output[ 4] = X##bu; \
   1680                     } \
   1681                 } \
   1682                 else { \
   1683                     output[ 4] = X##bu; \
   1684                     output[ 5] = X##ga; \
   1685                     if (laneCount >= 7) { \
   1686                         output[ 6] = X##ge; \
   1687                     } \
   1688                 } \
   1689             } \
   1690         } \
   1691         else { \
   1692             output[ 0] = X##ba; \
   1693             output[ 1] = X##be; \
   1694             output[ 2] = X##bi; \
   1695             output[ 3] = X##bo; \
   1696             output[ 4] = X##bu; \
   1697             output[ 5] = X##ga; \
   1698             output[ 6] = X##ge; \
   1699             output[ 7] = X##gi; \
   1700             if (laneCount < 12) { \
   1701                 if (laneCount < 10) { \
   1702                     if (laneCount >= 9) { \
   1703                         output[ 8] = X##go; \
   1704                     } \
   1705                 } \
   1706                 else { \
   1707                     output[ 8] = X##go; \
   1708                     output[ 9] = X##gu; \
   1709                     if (laneCount >= 11) { \
   1710                         output[10] = X##ka; \
   1711                     } \
   1712                 } \
   1713             } \
   1714             else { \
   1715                 output[ 8] = X##go; \
   1716                 output[ 9] = X##gu; \
   1717                 output[10] = X##ka; \
   1718                 output[11] = X##ke; \
   1719                 if (laneCount < 14) { \
   1720                     if (laneCount >= 13) { \
   1721                         output[12] = X##ki; \
   1722                     } \
   1723                 } \
   1724                 else { \
   1725                     output[12] = X##ki; \
   1726                     output[13] = X##ko; \
   1727                     if (laneCount >= 15) { \
   1728                         output[14] = X##ku; \
   1729                     } \
   1730                 } \
   1731             } \
   1732         } \
   1733     } \
   1734     else { \
   1735         output[ 0] = X##ba; \
   1736         output[ 1] = X##be; \
   1737         output[ 2] = X##bi; \
   1738         output[ 3] = X##bo; \
   1739         output[ 4] = X##bu; \
   1740         output[ 5] = X##ga; \
   1741         output[ 6] = X##ge; \
   1742         output[ 7] = X##gi; \
   1743         output[ 8] = X##go; \
   1744         output[ 9] = X##gu; \
   1745         output[10] = X##ka; \
   1746         output[11] = X##ke; \
   1747         output[12] = X##ki; \
   1748         output[13] = X##ko; \
   1749         output[14] = X##ku; \
   1750         output[15] = X##ma; \
   1751         if (laneCount < 24) { \
   1752             if (laneCount < 20) { \
   1753                 if (laneCount < 18) { \
   1754                     if (laneCount >= 17) { \
   1755                         output[16] = X##me; \
   1756                     } \
   1757                 } \
   1758                 else { \
   1759                     output[16] = X##me; \
   1760                     output[17] = X##mi; \
   1761                     if (laneCount >= 19) { \
   1762                         output[18] = X##mo; \
   1763                     } \
   1764                 } \
   1765             } \
   1766             else { \
   1767                 output[16] = X##me; \
   1768                 output[17] = X##mi; \
   1769                 output[18] = X##mo; \
   1770                 output[19] = X##mu; \
   1771                 if (laneCount < 22) { \
   1772                     if (laneCount >= 21) { \
   1773                         output[20] = X##sa; \
   1774                     } \
   1775                 } \
   1776                 else { \
   1777                     output[20] = X##sa; \
   1778                     output[21] = X##se; \
   1779                     if (laneCount >= 23) { \
   1780                         output[22] = X##si; \
   1781                     } \
   1782                 } \
   1783             } \
   1784         } \
   1785         else { \
   1786             output[16] = X##me; \
   1787             output[17] = X##mi; \
   1788             output[18] = X##mo; \
   1789             output[19] = X##mu; \
   1790             output[20] = X##sa; \
   1791             output[21] = X##se; \
   1792             output[22] = X##si; \
   1793             output[23] = X##so; \
   1794             if (laneCount >= 25) { \
   1795                 output[24] = X##su; \
   1796             } \
   1797         } \
   1798     }
   1799 
   1800 #define wrapOne(X, input, output, index, name) \
   1801     X##name ^= input[index]; \
   1802     output[index] = X##name;
   1803 
   1804 #define wrapOneInvert(X, input, output, index, name) \
   1805     X##name ^= input[index]; \
   1806     output[index] = X##name;
   1807 
   1808 #define unwrapOne(X, input, output, index, name) \
   1809     output[index] = input[index] ^ X##name; \
   1810     X##name ^= output[index];
   1811 
   1812 #define unwrapOneInvert(X, input, output, index, name) \
   1813     output[index] = input[index] ^ X##name; \
   1814     X##name ^= output[index];
   1815 
   1816 #endif
   1817 
   1818 #define wrap(X, input, output, laneCount, trailingBits) \
   1819     if (laneCount < 16) { \
   1820         if (laneCount < 8) { \
   1821             if (laneCount < 4) { \
   1822                 if (laneCount < 2) { \
   1823                     if (laneCount < 1) { \
   1824                         X##ba ^= trailingBits; \
   1825                     } \
   1826                     else { \
   1827                         wrapOne(X, input, output, 0, ba) \
   1828                         X##be ^= trailingBits; \
   1829                     } \
   1830                 } \
   1831                 else { \
   1832                     wrapOne(X, input, output, 0, ba) \
   1833                     wrapOneInvert(X, input, output, 1, be) \
   1834                     if (laneCount < 3) { \
   1835                         X##bi ^= trailingBits; \
   1836                     } \
   1837                     else { \
   1838                         wrapOneInvert(X, input, output, 2, bi) \
   1839                         X##bo ^= trailingBits; \
   1840                     } \
   1841                 } \
   1842             } \
   1843             else { \
   1844                 wrapOne(X, input, output, 0, ba) \
   1845                 wrapOneInvert(X, input, output, 1, be) \
   1846                 wrapOneInvert(X, input, output, 2, bi) \
   1847                 wrapOne(X, input, output, 3, bo) \
   1848                 if (laneCount < 6) { \
   1849                     if (laneCount < 5) { \
   1850                         X##bu ^= trailingBits; \
   1851                     } \
   1852                     else { \
   1853                         wrapOne(X, input, output, 4, bu) \
   1854                         X##ga ^= trailingBits; \
   1855                     } \
   1856                 } \
   1857                 else { \
   1858                     wrapOne(X, input, output, 4, bu) \
   1859                     wrapOne(X, input, output, 5, ga) \
   1860                     if (laneCount < 7) { \
   1861                         X##ge ^= trailingBits; \
   1862                     } \
   1863                     else { \
   1864                         wrapOne(X, input, output, 6, ge) \
   1865                         X##gi ^= trailingBits; \
   1866                     } \
   1867                 } \
   1868             } \
   1869         } \
   1870         else { \
   1871             wrapOne(X, input, output, 0, ba) \
   1872             wrapOneInvert(X, input, output, 1, be) \
   1873             wrapOneInvert(X, input, output, 2, bi) \
   1874             wrapOne(X, input, output, 3, bo) \
   1875             wrapOne(X, input, output, 4, bu) \
   1876             wrapOne(X, input, output, 5, ga) \
   1877             wrapOne(X, input, output, 6, ge) \
   1878             wrapOne(X, input, output, 7, gi) \
   1879             if (laneCount < 12) { \
   1880                 if (laneCount < 10) { \
   1881                     if (laneCount < 9) { \
   1882                         X##go ^= trailingBits; \
   1883                     } \
   1884                     else { \
   1885                         wrapOneInvert(X, input, output, 8, go) \
   1886                         X##gu ^= trailingBits; \
   1887                     } \
   1888                 } \
   1889                 else { \
   1890                     wrapOneInvert(X, input, output, 8, go) \
   1891                     wrapOne(X, input, output, 9, gu) \
   1892                     if (laneCount < 11) { \
   1893                         X##ka ^= trailingBits; \
   1894                     } \
   1895                     else { \
   1896                         wrapOne(X, input, output, 10, ka) \
   1897                         X##ke ^= trailingBits; \
   1898                     } \
   1899                 } \
   1900             } \
   1901             else { \
   1902                 wrapOneInvert(X, input, output, 8, go) \
   1903                 wrapOne(X, input, output, 9, gu) \
   1904                 wrapOne(X, input, output, 10, ka) \
   1905                 wrapOne(X, input, output, 11, ke) \
   1906                 if (laneCount < 14) { \
   1907                     if (laneCount < 13) { \
   1908                         X##ki ^= trailingBits; \
   1909                     } \
   1910                     else { \
   1911                         wrapOneInvert(X, input, output, 12, ki) \
   1912                         X##ko ^= trailingBits; \
   1913                     } \
   1914                 } \
   1915                 else { \
   1916                     wrapOneInvert(X, input, output, 12, ki) \
   1917                     wrapOne(X, input, output, 13, ko) \
   1918                     if (laneCount < 15) { \
   1919                         X##ku ^= trailingBits; \
   1920                     } \
   1921                     else { \
   1922                         wrapOne(X, input, output, 14, ku) \
   1923                         X##ma ^= trailingBits; \
   1924                     } \
   1925                 } \
   1926             } \
   1927         } \
   1928     } \
   1929     else { \
   1930         wrapOne(X, input, output, 0, ba) \
   1931         wrapOneInvert(X, input, output, 1, be) \
   1932         wrapOneInvert(X, input, output, 2, bi) \
   1933         wrapOne(X, input, output, 3, bo) \
   1934         wrapOne(X, input, output, 4, bu) \
   1935         wrapOne(X, input, output, 5, ga) \
   1936         wrapOne(X, input, output, 6, ge) \
   1937         wrapOne(X, input, output, 7, gi) \
   1938         wrapOneInvert(X, input, output, 8, go) \
   1939         wrapOne(X, input, output, 9, gu) \
   1940         wrapOne(X, input, output, 10, ka) \
   1941         wrapOne(X, input, output, 11, ke) \
   1942         wrapOneInvert(X, input, output, 12, ki) \
   1943         wrapOne(X, input, output, 13, ko) \
   1944         wrapOne(X, input, output, 14, ku) \
   1945         wrapOne(X, input, output, 15, ma) \
   1946         if (laneCount < 24) { \
   1947             if (laneCount < 20) { \
   1948                 if (laneCount < 18) { \
   1949                     if (laneCount < 17) { \
   1950                         X##me ^= trailingBits; \
   1951                     } \
   1952                     else { \
   1953                         wrapOne(X, input, output, 16, me) \
   1954                         X##mi ^= trailingBits; \
   1955                     } \
   1956                 } \
   1957                 else { \
   1958                     wrapOne(X, input, output, 16, me) \
   1959                     wrapOneInvert(X, input, output, 17, mi) \
   1960                     if (laneCount < 19) { \
   1961                         X##mo ^= trailingBits; \
   1962                     } \
   1963                     else { \
   1964                         wrapOne(X, input, output, 18, mo) \
   1965                         X##mu ^= trailingBits; \
   1966                     } \
   1967                 } \
   1968             } \
   1969             else { \
   1970                 wrapOne(X, input, output, 16, me) \
   1971                 wrapOneInvert(X, input, output, 17, mi) \
   1972                 wrapOne(X, input, output, 18, mo) \
   1973                 wrapOne(X, input, output, 19, mu) \
   1974                 if (laneCount < 22) { \
   1975                     if (laneCount < 21) { \
   1976                         X##sa ^= trailingBits; \
   1977                     } \
   1978                     else { \
   1979                         wrapOneInvert(X, input, output, 20, sa) \
   1980                         X##se ^= trailingBits; \
   1981                     } \
   1982                 } \
   1983                 else { \
   1984                     wrapOneInvert(X, input, output, 20, sa) \
   1985                     wrapOne(X, input, output, 21, se) \
   1986                     if (laneCount < 23) { \
   1987                         X##si ^= trailingBits; \
   1988                     } \
   1989                     else { \
   1990                         wrapOne(X, input, output, 22, si) \
   1991                         X##so ^= trailingBits; \
   1992                     } \
   1993                 } \
   1994             } \
   1995         } \
   1996         else { \
   1997             wrapOne(X, input, output, 16, me) \
   1998             wrapOneInvert(X, input, output, 17, mi) \
   1999             wrapOne(X, input, output, 18, mo) \
   2000             wrapOne(X, input, output, 19, mu) \
   2001             wrapOneInvert(X, input, output, 20, sa) \
   2002             wrapOne(X, input, output, 21, se) \
   2003             wrapOne(X, input, output, 22, si) \
   2004             wrapOne(X, input, output, 23, so) \
   2005             if (laneCount < 25) { \
   2006                 X##su ^= trailingBits; \
   2007             } \
   2008             else { \
   2009                 wrapOne(X, input, output, 24, su) \
   2010             } \
   2011         } \
   2012     }
   2013 
   2014 #define unwrap(X, input, output, laneCount, trailingBits) \
   2015     if (laneCount < 16) { \
   2016         if (laneCount < 8) { \
   2017             if (laneCount < 4) { \
   2018                 if (laneCount < 2) { \
   2019                     if (laneCount < 1) { \
   2020                         X##ba ^= trailingBits; \
   2021                     } \
   2022                     else { \
   2023                         unwrapOne(X, input, output, 0, ba) \
   2024                         X##be ^= trailingBits; \
   2025                     } \
   2026                 } \
   2027                 else { \
   2028                     unwrapOne(X, input, output, 0, ba) \
   2029                     unwrapOneInvert(X, input, output, 1, be) \
   2030                     if (laneCount < 3) { \
   2031                         X##bi ^= trailingBits; \
   2032                     } \
   2033                     else { \
   2034                         unwrapOneInvert(X, input, output, 2, bi) \
   2035                         X##bo ^= trailingBits; \
   2036                     } \
   2037                 } \
   2038             } \
   2039             else { \
   2040                 unwrapOne(X, input, output, 0, ba) \
   2041                 unwrapOneInvert(X, input, output, 1, be) \
   2042                 unwrapOneInvert(X, input, output, 2, bi) \
   2043                 unwrapOne(X, input, output, 3, bo) \
   2044                 if (laneCount < 6) { \
   2045                     if (laneCount < 5) { \
   2046                         X##bu ^= trailingBits; \
   2047                     } \
   2048                     else { \
   2049                         unwrapOne(X, input, output, 4, bu) \
   2050                         X##ga ^= trailingBits; \
   2051                     } \
   2052                 } \
   2053                 else { \
   2054                     unwrapOne(X, input, output, 4, bu) \
   2055                     unwrapOne(X, input, output, 5, ga) \
   2056                     if (laneCount < 7) { \
   2057                         X##ge ^= trailingBits; \
   2058                     } \
   2059                     else { \
   2060                         unwrapOne(X, input, output, 6, ge) \
   2061                         X##gi ^= trailingBits; \
   2062                     } \
   2063                 } \
   2064             } \
   2065         } \
   2066         else { \
   2067             unwrapOne(X, input, output, 0, ba) \
   2068             unwrapOneInvert(X, input, output, 1, be) \
   2069             unwrapOneInvert(X, input, output, 2, bi) \
   2070             unwrapOne(X, input, output, 3, bo) \
   2071             unwrapOne(X, input, output, 4, bu) \
   2072             unwrapOne(X, input, output, 5, ga) \
   2073             unwrapOne(X, input, output, 6, ge) \
   2074             unwrapOne(X, input, output, 7, gi) \
   2075             if (laneCount < 12) { \
   2076                 if (laneCount < 10) { \
   2077                     if (laneCount < 9) { \
   2078                         X##go ^= trailingBits; \
   2079                     } \
   2080                     else { \
   2081                         unwrapOneInvert(X, input, output, 8, go) \
   2082                         X##gu ^= trailingBits; \
   2083                     } \
   2084                 } \
   2085                 else { \
   2086                     unwrapOneInvert(X, input, output, 8, go) \
   2087                     unwrapOne(X, input, output, 9, gu) \
   2088                     if (laneCount < 11) { \
   2089                         X##ka ^= trailingBits; \
   2090                     } \
   2091                     else { \
   2092                         unwrapOne(X, input, output, 10, ka) \
   2093                         X##ke ^= trailingBits; \
   2094                     } \
   2095                 } \
   2096             } \
   2097             else { \
   2098                 unwrapOneInvert(X, input, output, 8, go) \
   2099                 unwrapOne(X, input, output, 9, gu) \
   2100                 unwrapOne(X, input, output, 10, ka) \
   2101                 unwrapOne(X, input, output, 11, ke) \
   2102                 if (laneCount < 14) { \
   2103                     if (laneCount < 13) { \
   2104                         X##ki ^= trailingBits; \
   2105                     } \
   2106                     else { \
   2107                         unwrapOneInvert(X, input, output, 12, ki) \
   2108                         X##ko ^= trailingBits; \
   2109                     } \
   2110                 } \
   2111                 else { \
   2112                     unwrapOneInvert(X, input, output, 12, ki) \
   2113                     unwrapOne(X, input, output, 13, ko) \
   2114                     if (laneCount < 15) { \
   2115                         X##ku ^= trailingBits; \
   2116                     } \
   2117                     else { \
   2118                         unwrapOne(X, input, output, 14, ku) \
   2119                         X##ma ^= trailingBits; \
   2120                     } \
   2121                 } \
   2122             } \
   2123         } \
   2124     } \
   2125     else { \
   2126         unwrapOne(X, input, output, 0, ba) \
   2127         unwrapOneInvert(X, input, output, 1, be) \
   2128         unwrapOneInvert(X, input, output, 2, bi) \
   2129         unwrapOne(X, input, output, 3, bo) \
   2130         unwrapOne(X, input, output, 4, bu) \
   2131         unwrapOne(X, input, output, 5, ga) \
   2132         unwrapOne(X, input, output, 6, ge) \
   2133         unwrapOne(X, input, output, 7, gi) \
   2134         unwrapOneInvert(X, input, output, 8, go) \
   2135         unwrapOne(X, input, output, 9, gu) \
   2136         unwrapOne(X, input, output, 10, ka) \
   2137         unwrapOne(X, input, output, 11, ke) \
   2138         unwrapOneInvert(X, input, output, 12, ki) \
   2139         unwrapOne(X, input, output, 13, ko) \
   2140         unwrapOne(X, input, output, 14, ku) \
   2141         unwrapOne(X, input, output, 15, ma) \
   2142         if (laneCount < 24) { \
   2143             if (laneCount < 20) { \
   2144                 if (laneCount < 18) { \
   2145                     if (laneCount < 17) { \
   2146                         X##me ^= trailingBits; \
   2147                     } \
   2148                     else { \
   2149                         unwrapOne(X, input, output, 16, me) \
   2150                         X##mi ^= trailingBits; \
   2151                     } \
   2152                 } \
   2153                 else { \
   2154                     unwrapOne(X, input, output, 16, me) \
   2155                     unwrapOneInvert(X, input, output, 17, mi) \
   2156                     if (laneCount < 19) { \
   2157                         X##mo ^= trailingBits; \
   2158                     } \
   2159                     else { \
   2160                         unwrapOne(X, input, output, 18, mo) \
   2161                         X##mu ^= trailingBits; \
   2162                     } \
   2163                 } \
   2164             } \
   2165             else { \
   2166                 unwrapOne(X, input, output, 16, me) \
   2167                 unwrapOneInvert(X, input, output, 17, mi) \
   2168                 unwrapOne(X, input, output, 18, mo) \
   2169                 unwrapOne(X, input, output, 19, mu) \
   2170                 if (laneCount < 22) { \
   2171                     if (laneCount < 21) { \
   2172                         X##sa ^= trailingBits; \
   2173                     } \
   2174                     else { \
   2175                         unwrapOneInvert(X, input, output, 20, sa) \
   2176                         X##se ^= trailingBits; \
   2177                     } \
   2178                 } \
   2179                 else { \
   2180                     unwrapOneInvert(X, input, output, 20, sa) \
   2181                     unwrapOne(X, input, output, 21, se) \
   2182                     if (laneCount < 23) { \
   2183                         X##si ^= trailingBits; \
   2184                     } \
   2185                     else { \
   2186                         unwrapOne(X, input, output, 22, si) \
   2187                         X##so ^= trailingBits; \
   2188                     } \
   2189                 } \
   2190             } \
   2191         } \
   2192         else { \
   2193             unwrapOne(X, input, output, 16, me) \
   2194             unwrapOneInvert(X, input, output, 17, mi) \
   2195             unwrapOne(X, input, output, 18, mo) \
   2196             unwrapOne(X, input, output, 19, mu) \
   2197             unwrapOneInvert(X, input, output, 20, sa) \
   2198             unwrapOne(X, input, output, 21, se) \
   2199             unwrapOne(X, input, output, 22, si) \
   2200             unwrapOne(X, input, output, 23, so) \
   2201             if (laneCount < 25) { \
   2202                 X##su ^= trailingBits; \
   2203             } \
   2204             else { \
   2205                 unwrapOne(X, input, output, 24, su) \
   2206             } \
   2207         } \
   2208     }
   2209