Home | History | Annotate | Download | only in Modules
      1 /* MD5 module */
      2 
      3 /* This module provides an interface to the MD5 algorithm */
      4 
      5 /* See below for information about the original code this module was
      6    based upon. Additional work performed by:
      7 
      8    Andrew Kuchling (amk (at) amk.ca)
      9    Greg Stein (gstein (at) lyra.org)
     10    Trevor Perrin (trevp (at) trevp.net)
     11 
     12    Copyright (C) 2005-2007   Gregory P. Smith (greg (at) krypto.org)
     13    Licensed to PSF under a Contributor Agreement.
     14 
     15 */
     16 
     17 /* MD5 objects */
     18 
     19 #include "Python.h"
     20 #include "hashlib.h"
     21 #include "pystrhex.h"
     22 
     23 /*[clinic input]
     24 module _md5
     25 class MD5Type "MD5object *" "&PyType_Type"
     26 [clinic start generated code]*/
     27 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/
     28 
     29 /* Some useful types */
     30 
     31 #if SIZEOF_INT == 4
     32 typedef unsigned int MD5_INT32; /* 32-bit integer */
     33 typedef long long MD5_INT64; /* 64-bit integer */
     34 #else
     35 /* not defined. compilation will die. */
     36 #endif
     37 
     38 /* The MD5 block size and message digest sizes, in bytes */
     39 
     40 #define MD5_BLOCKSIZE    64
     41 #define MD5_DIGESTSIZE   16
     42 
     43 /* The structure for storing MD5 info */
     44 
     45 struct md5_state {
     46     MD5_INT64 length;
     47     MD5_INT32 state[4], curlen;
     48     unsigned char buf[MD5_BLOCKSIZE];
     49 };
     50 
     51 typedef struct {
     52     PyObject_HEAD
     53 
     54     struct md5_state hash_state;
     55 } MD5object;
     56 
     57 #include "clinic/md5module.c.h"
     58 
     59 /* ------------------------------------------------------------------------
     60  *
     61  * This code for the MD5 algorithm was noted as public domain. The
     62  * original headers are pasted below.
     63  *
     64  * Several changes have been made to make it more compatible with the
     65  * Python environment and desired interface.
     66  *
     67  */
     68 
     69 /* LibTomCrypt, modular cryptographic library -- Tom St Denis
     70  *
     71  * LibTomCrypt is a library that provides various cryptographic
     72  * algorithms in a highly modular and flexible manner.
     73  *
     74  * The library is free for all purposes without any express
     75  * guarantee it works.
     76  *
     77  * Tom St Denis, tomstdenis (at) gmail.com, http://libtom.org
     78  */
     79 
     80 /* rotate the hard way (platform optimizations could be done) */
     81 #define ROLc(x, y) ( (((unsigned long)(x)<<(unsigned long)((y)&31)) | (((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL)
     82 
     83 /* Endian Neutral macros that work on all platforms */
     84 
     85 #define STORE32L(x, y)                                                                     \
     86      { (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255);   \
     87        (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); }
     88 
     89 #define LOAD32L(x, y)                            \
     90      { x = ((unsigned long)((y)[3] & 255)<<24) | \
     91            ((unsigned long)((y)[2] & 255)<<16) | \
     92            ((unsigned long)((y)[1] & 255)<<8)  | \
     93            ((unsigned long)((y)[0] & 255)); }
     94 
     95 #define STORE64L(x, y)                                                                     \
     96      { (y)[7] = (unsigned char)(((x)>>56)&255); (y)[6] = (unsigned char)(((x)>>48)&255);   \
     97        (y)[5] = (unsigned char)(((x)>>40)&255); (y)[4] = (unsigned char)(((x)>>32)&255);   \
     98        (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255);   \
     99        (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); }
    100 
    101 
    102 /* MD5 macros */
    103 
    104 #define F(x,y,z)  (z ^ (x & (y ^ z)))
    105 #define G(x,y,z)  (y ^ (z & (y ^ x)))
    106 #define H(x,y,z)  (x^y^z)
    107 #define I(x,y,z)  (y^(x|(~z)))
    108 
    109 #define FF(a,b,c,d,M,s,t) \
    110     a = (a + F(b,c,d) + M + t); a = ROLc(a, s) + b;
    111 
    112 #define GG(a,b,c,d,M,s,t) \
    113     a = (a + G(b,c,d) + M + t); a = ROLc(a, s) + b;
    114 
    115 #define HH(a,b,c,d,M,s,t) \
    116     a = (a + H(b,c,d) + M + t); a = ROLc(a, s) + b;
    117 
    118 #define II(a,b,c,d,M,s,t) \
    119     a = (a + I(b,c,d) + M + t); a = ROLc(a, s) + b;
    120 
    121 
    122 static void md5_compress(struct md5_state *md5, unsigned char *buf)
    123 {
    124     MD5_INT32 i, W[16], a, b, c, d;
    125 
    126     assert(md5 != NULL);
    127     assert(buf != NULL);
    128 
    129     /* copy the state into 512-bits into W[0..15] */
    130     for (i = 0; i < 16; i++) {
    131         LOAD32L(W[i], buf + (4*i));
    132     }
    133 
    134     /* copy state */
    135     a = md5->state[0];
    136     b = md5->state[1];
    137     c = md5->state[2];
    138     d = md5->state[3];
    139 
    140     FF(a,b,c,d,W[0],7,0xd76aa478UL)
    141     FF(d,a,b,c,W[1],12,0xe8c7b756UL)
    142     FF(c,d,a,b,W[2],17,0x242070dbUL)
    143     FF(b,c,d,a,W[3],22,0xc1bdceeeUL)
    144     FF(a,b,c,d,W[4],7,0xf57c0fafUL)
    145     FF(d,a,b,c,W[5],12,0x4787c62aUL)
    146     FF(c,d,a,b,W[6],17,0xa8304613UL)
    147     FF(b,c,d,a,W[7],22,0xfd469501UL)
    148     FF(a,b,c,d,W[8],7,0x698098d8UL)
    149     FF(d,a,b,c,W[9],12,0x8b44f7afUL)
    150     FF(c,d,a,b,W[10],17,0xffff5bb1UL)
    151     FF(b,c,d,a,W[11],22,0x895cd7beUL)
    152     FF(a,b,c,d,W[12],7,0x6b901122UL)
    153     FF(d,a,b,c,W[13],12,0xfd987193UL)
    154     FF(c,d,a,b,W[14],17,0xa679438eUL)
    155     FF(b,c,d,a,W[15],22,0x49b40821UL)
    156     GG(a,b,c,d,W[1],5,0xf61e2562UL)
    157     GG(d,a,b,c,W[6],9,0xc040b340UL)
    158     GG(c,d,a,b,W[11],14,0x265e5a51UL)
    159     GG(b,c,d,a,W[0],20,0xe9b6c7aaUL)
    160     GG(a,b,c,d,W[5],5,0xd62f105dUL)
    161     GG(d,a,b,c,W[10],9,0x02441453UL)
    162     GG(c,d,a,b,W[15],14,0xd8a1e681UL)
    163     GG(b,c,d,a,W[4],20,0xe7d3fbc8UL)
    164     GG(a,b,c,d,W[9],5,0x21e1cde6UL)
    165     GG(d,a,b,c,W[14],9,0xc33707d6UL)
    166     GG(c,d,a,b,W[3],14,0xf4d50d87UL)
    167     GG(b,c,d,a,W[8],20,0x455a14edUL)
    168     GG(a,b,c,d,W[13],5,0xa9e3e905UL)
    169     GG(d,a,b,c,W[2],9,0xfcefa3f8UL)
    170     GG(c,d,a,b,W[7],14,0x676f02d9UL)
    171     GG(b,c,d,a,W[12],20,0x8d2a4c8aUL)
    172     HH(a,b,c,d,W[5],4,0xfffa3942UL)
    173     HH(d,a,b,c,W[8],11,0x8771f681UL)
    174     HH(c,d,a,b,W[11],16,0x6d9d6122UL)
    175     HH(b,c,d,a,W[14],23,0xfde5380cUL)
    176     HH(a,b,c,d,W[1],4,0xa4beea44UL)
    177     HH(d,a,b,c,W[4],11,0x4bdecfa9UL)
    178     HH(c,d,a,b,W[7],16,0xf6bb4b60UL)
    179     HH(b,c,d,a,W[10],23,0xbebfbc70UL)
    180     HH(a,b,c,d,W[13],4,0x289b7ec6UL)
    181     HH(d,a,b,c,W[0],11,0xeaa127faUL)
    182     HH(c,d,a,b,W[3],16,0xd4ef3085UL)
    183     HH(b,c,d,a,W[6],23,0x04881d05UL)
    184     HH(a,b,c,d,W[9],4,0xd9d4d039UL)
    185     HH(d,a,b,c,W[12],11,0xe6db99e5UL)
    186     HH(c,d,a,b,W[15],16,0x1fa27cf8UL)
    187     HH(b,c,d,a,W[2],23,0xc4ac5665UL)
    188     II(a,b,c,d,W[0],6,0xf4292244UL)
    189     II(d,a,b,c,W[7],10,0x432aff97UL)
    190     II(c,d,a,b,W[14],15,0xab9423a7UL)
    191     II(b,c,d,a,W[5],21,0xfc93a039UL)
    192     II(a,b,c,d,W[12],6,0x655b59c3UL)
    193     II(d,a,b,c,W[3],10,0x8f0ccc92UL)
    194     II(c,d,a,b,W[10],15,0xffeff47dUL)
    195     II(b,c,d,a,W[1],21,0x85845dd1UL)
    196     II(a,b,c,d,W[8],6,0x6fa87e4fUL)
    197     II(d,a,b,c,W[15],10,0xfe2ce6e0UL)
    198     II(c,d,a,b,W[6],15,0xa3014314UL)
    199     II(b,c,d,a,W[13],21,0x4e0811a1UL)
    200     II(a,b,c,d,W[4],6,0xf7537e82UL)
    201     II(d,a,b,c,W[11],10,0xbd3af235UL)
    202     II(c,d,a,b,W[2],15,0x2ad7d2bbUL)
    203     II(b,c,d,a,W[9],21,0xeb86d391UL)
    204 
    205     md5->state[0] = md5->state[0] + a;
    206     md5->state[1] = md5->state[1] + b;
    207     md5->state[2] = md5->state[2] + c;
    208     md5->state[3] = md5->state[3] + d;
    209 }
    210 
    211 
    212 /**
    213    Initialize the hash state
    214    @param sha1   The hash state you wish to initialize
    215 */
    216 static void
    217 md5_init(struct md5_state *md5)
    218 {
    219     assert(md5 != NULL);
    220     md5->state[0] = 0x67452301UL;
    221     md5->state[1] = 0xefcdab89UL;
    222     md5->state[2] = 0x98badcfeUL;
    223     md5->state[3] = 0x10325476UL;
    224     md5->curlen = 0;
    225     md5->length = 0;
    226 }
    227 
    228 /**
    229    Process a block of memory though the hash
    230    @param sha1   The hash state
    231    @param in     The data to hash
    232    @param inlen  The length of the data (octets)
    233 */
    234 static void
    235 md5_process(struct md5_state *md5, const unsigned char *in, Py_ssize_t inlen)
    236 {
    237     Py_ssize_t n;
    238 
    239     assert(md5 != NULL);
    240     assert(in != NULL);
    241     assert(md5->curlen <= sizeof(md5->buf));
    242 
    243     while (inlen > 0) {
    244         if (md5->curlen == 0 && inlen >= MD5_BLOCKSIZE) {
    245            md5_compress(md5, (unsigned char *)in);
    246            md5->length    += MD5_BLOCKSIZE * 8;
    247            in             += MD5_BLOCKSIZE;
    248            inlen          -= MD5_BLOCKSIZE;
    249         } else {
    250            n = Py_MIN(inlen, (Py_ssize_t)(MD5_BLOCKSIZE - md5->curlen));
    251            memcpy(md5->buf + md5->curlen, in, (size_t)n);
    252            md5->curlen    += (MD5_INT32)n;
    253            in             += n;
    254            inlen          -= n;
    255            if (md5->curlen == MD5_BLOCKSIZE) {
    256               md5_compress(md5, md5->buf);
    257               md5->length += 8*MD5_BLOCKSIZE;
    258               md5->curlen = 0;
    259            }
    260        }
    261     }
    262 }
    263 
    264 /**
    265    Terminate the hash to get the digest
    266    @param sha1  The hash state
    267    @param out [out] The destination of the hash (16 bytes)
    268 */
    269 static void
    270 md5_done(struct md5_state *md5, unsigned char *out)
    271 {
    272     int i;
    273 
    274     assert(md5 != NULL);
    275     assert(out != NULL);
    276     assert(md5->curlen < sizeof(md5->buf));
    277 
    278     /* increase the length of the message */
    279     md5->length += md5->curlen * 8;
    280 
    281     /* append the '1' bit */
    282     md5->buf[md5->curlen++] = (unsigned char)0x80;
    283 
    284     /* if the length is currently above 56 bytes we append zeros
    285      * then compress.  Then we can fall back to padding zeros and length
    286      * encoding like normal.
    287      */
    288     if (md5->curlen > 56) {
    289         while (md5->curlen < 64) {
    290             md5->buf[md5->curlen++] = (unsigned char)0;
    291         }
    292         md5_compress(md5, md5->buf);
    293         md5->curlen = 0;
    294     }
    295 
    296     /* pad upto 56 bytes of zeroes */
    297     while (md5->curlen < 56) {
    298         md5->buf[md5->curlen++] = (unsigned char)0;
    299     }
    300 
    301     /* store length */
    302     STORE64L(md5->length, md5->buf+56);
    303     md5_compress(md5, md5->buf);
    304 
    305     /* copy output */
    306     for (i = 0; i < 4; i++) {
    307         STORE32L(md5->state[i], out+(4*i));
    308     }
    309 }
    310 
    311 /* .Source: /cvs/libtom/libtomcrypt/src/hashes/md5.c,v $ */
    312 /* .Revision: 1.10 $ */
    313 /* .Date: 2007/05/12 14:25:28 $ */
    314 
    315 /*
    316  * End of copied MD5 code.
    317  *
    318  * ------------------------------------------------------------------------
    319  */
    320 
    321 static PyTypeObject MD5type;
    322 
    323 
    324 static MD5object *
    325 newMD5object(void)
    326 {
    327     return (MD5object *)PyObject_New(MD5object, &MD5type);
    328 }
    329 
    330 
    331 /* Internal methods for a hash object */
    332 
    333 static void
    334 MD5_dealloc(PyObject *ptr)
    335 {
    336     PyObject_Del(ptr);
    337 }
    338 
    339 
    340 /* External methods for a hash object */
    341 
    342 /*[clinic input]
    343 MD5Type.copy
    344 
    345 Return a copy of the hash object.
    346 [clinic start generated code]*/
    347 
    348 static PyObject *
    349 MD5Type_copy_impl(MD5object *self)
    350 /*[clinic end generated code: output=596eb36852f02071 input=2c09e6d2493f3079]*/
    351 {
    352     MD5object *newobj;
    353 
    354     if ((newobj = newMD5object())==NULL)
    355         return NULL;
    356 
    357     newobj->hash_state = self->hash_state;
    358     return (PyObject *)newobj;
    359 }
    360 
    361 /*[clinic input]
    362 MD5Type.digest
    363 
    364 Return the digest value as a string of binary data.
    365 [clinic start generated code]*/
    366 
    367 static PyObject *
    368 MD5Type_digest_impl(MD5object *self)
    369 /*[clinic end generated code: output=eb691dc4190a07ec input=7b96e65389412a34]*/
    370 {
    371     unsigned char digest[MD5_DIGESTSIZE];
    372     struct md5_state temp;
    373 
    374     temp = self->hash_state;
    375     md5_done(&temp, digest);
    376     return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE);
    377 }
    378 
    379 /*[clinic input]
    380 MD5Type.hexdigest
    381 
    382 Return the digest value as a string of hexadecimal digits.
    383 [clinic start generated code]*/
    384 
    385 static PyObject *
    386 MD5Type_hexdigest_impl(MD5object *self)
    387 /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/
    388 {
    389     unsigned char digest[MD5_DIGESTSIZE];
    390     struct md5_state temp;
    391 
    392     /* Get the raw (binary) digest value */
    393     temp = self->hash_state;
    394     md5_done(&temp, digest);
    395 
    396     return _Py_strhex((const char*)digest, MD5_DIGESTSIZE);
    397 }
    398 
    399 /*[clinic input]
    400 MD5Type.update
    401 
    402     obj: object
    403     /
    404 
    405 Update this hash object's state with the provided string.
    406 [clinic start generated code]*/
    407 
    408 static PyObject *
    409 MD5Type_update(MD5object *self, PyObject *obj)
    410 /*[clinic end generated code: output=f6ad168416338423 input=6e1efcd9ecf17032]*/
    411 {
    412     Py_buffer buf;
    413 
    414     GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
    415 
    416     md5_process(&self->hash_state, buf.buf, buf.len);
    417 
    418     PyBuffer_Release(&buf);
    419     Py_INCREF(Py_None);
    420     return Py_None;
    421 }
    422 
    423 static PyMethodDef MD5_methods[] = {
    424     MD5TYPE_COPY_METHODDEF
    425     MD5TYPE_DIGEST_METHODDEF
    426     MD5TYPE_HEXDIGEST_METHODDEF
    427     MD5TYPE_UPDATE_METHODDEF
    428     {NULL,        NULL}         /* sentinel */
    429 };
    430 
    431 static PyObject *
    432 MD5_get_block_size(PyObject *self, void *closure)
    433 {
    434     return PyLong_FromLong(MD5_BLOCKSIZE);
    435 }
    436 
    437 static PyObject *
    438 MD5_get_name(PyObject *self, void *closure)
    439 {
    440     return PyUnicode_FromStringAndSize("md5", 3);
    441 }
    442 
    443 static PyObject *
    444 md5_get_digest_size(PyObject *self, void *closure)
    445 {
    446     return PyLong_FromLong(MD5_DIGESTSIZE);
    447 }
    448 
    449 
    450 static PyGetSetDef MD5_getseters[] = {
    451     {"block_size",
    452      (getter)MD5_get_block_size, NULL,
    453      NULL,
    454      NULL},
    455     {"name",
    456      (getter)MD5_get_name, NULL,
    457      NULL,
    458      NULL},
    459     {"digest_size",
    460      (getter)md5_get_digest_size, NULL,
    461      NULL,
    462      NULL},
    463     {NULL}  /* Sentinel */
    464 };
    465 
    466 static PyTypeObject MD5type = {
    467     PyVarObject_HEAD_INIT(NULL, 0)
    468     "_md5.md5",         /*tp_name*/
    469     sizeof(MD5object),  /*tp_size*/
    470     0,                  /*tp_itemsize*/
    471     /* methods */
    472     MD5_dealloc,        /*tp_dealloc*/
    473     0,                  /*tp_print*/
    474     0,                  /*tp_getattr*/
    475     0,                  /*tp_setattr*/
    476     0,                  /*tp_reserved*/
    477     0,                  /*tp_repr*/
    478     0,                  /*tp_as_number*/
    479     0,                  /*tp_as_sequence*/
    480     0,                  /*tp_as_mapping*/
    481     0,                  /*tp_hash*/
    482     0,                  /*tp_call*/
    483     0,                  /*tp_str*/
    484     0,                  /*tp_getattro*/
    485     0,                  /*tp_setattro*/
    486     0,                  /*tp_as_buffer*/
    487     Py_TPFLAGS_DEFAULT, /*tp_flags*/
    488     0,                  /*tp_doc*/
    489     0,                  /*tp_traverse*/
    490     0,                  /*tp_clear*/
    491     0,                  /*tp_richcompare*/
    492     0,                  /*tp_weaklistoffset*/
    493     0,                  /*tp_iter*/
    494     0,                  /*tp_iternext*/
    495     MD5_methods,        /* tp_methods */
    496     NULL,               /* tp_members */
    497     MD5_getseters,      /* tp_getset */
    498 };
    499 
    500 
    501 /* The single module-level function: new() */
    502 
    503 /*[clinic input]
    504 _md5.md5
    505 
    506     string: object(c_default="NULL") = b''
    507 
    508 Return a new MD5 hash object; optionally initialized with a string.
    509 [clinic start generated code]*/
    510 
    511 static PyObject *
    512 _md5_md5_impl(PyObject *module, PyObject *string)
    513 /*[clinic end generated code: output=2cfd0f8c091b97e6 input=d12ef8f72d684f7b]*/
    514 {
    515     MD5object *new;
    516     Py_buffer buf;
    517 
    518     if (string)
    519         GET_BUFFER_VIEW_OR_ERROUT(string, &buf);
    520 
    521     if ((new = newMD5object()) == NULL) {
    522         if (string)
    523             PyBuffer_Release(&buf);
    524         return NULL;
    525     }
    526 
    527     md5_init(&new->hash_state);
    528 
    529     if (PyErr_Occurred()) {
    530         Py_DECREF(new);
    531         if (string)
    532             PyBuffer_Release(&buf);
    533         return NULL;
    534     }
    535     if (string) {
    536         md5_process(&new->hash_state, buf.buf, buf.len);
    537         PyBuffer_Release(&buf);
    538     }
    539 
    540     return (PyObject *)new;
    541 }
    542 
    543 
    544 /* List of functions exported by this module */
    545 
    546 static struct PyMethodDef MD5_functions[] = {
    547     _MD5_MD5_METHODDEF
    548     {NULL,      NULL}            /* Sentinel */
    549 };
    550 
    551 
    552 /* Initialize this module. */
    553 
    554 #define insint(n,v) { PyModule_AddIntConstant(m,n,v); }
    555 
    556 
    557 static struct PyModuleDef _md5module = {
    558         PyModuleDef_HEAD_INIT,
    559         "_md5",
    560         NULL,
    561         -1,
    562         MD5_functions,
    563         NULL,
    564         NULL,
    565         NULL,
    566         NULL
    567 };
    568 
    569 PyMODINIT_FUNC
    570 PyInit__md5(void)
    571 {
    572     PyObject *m;
    573 
    574     Py_TYPE(&MD5type) = &PyType_Type;
    575     if (PyType_Ready(&MD5type) < 0)
    576         return NULL;
    577 
    578     m = PyModule_Create(&_md5module);
    579     if (m == NULL)
    580         return NULL;
    581 
    582     Py_INCREF((PyObject *)&MD5type);
    583     PyModule_AddObject(m, "MD5Type", (PyObject *)&MD5type);
    584     return m;
    585 }
    586