Home | History | Annotate | Download | only in Modules
      1 /* Module that wraps all OpenSSL hash algorithms */
      2 
      3 /*
      4  * Copyright (C) 2005-2010   Gregory P. Smith (greg (at) krypto.org)
      5  * Licensed to PSF under a Contributor Agreement.
      6  *
      7  * Derived from a skeleton of shamodule.c containing work performed by:
      8  *
      9  * Andrew Kuchling (amk (at) amk.ca)
     10  * Greg Stein (gstein (at) lyra.org)
     11  *
     12  */
     13 
     14 #define PY_SSIZE_T_CLEAN
     15 
     16 #include "Python.h"
     17 #include "structmember.h"
     18 
     19 #ifdef WITH_THREAD
     20 #include "pythread.h"
     21     #define ENTER_HASHLIB(obj) \
     22         if ((obj)->lock) { \
     23             if (!PyThread_acquire_lock((obj)->lock, 0)) { \
     24                 Py_BEGIN_ALLOW_THREADS \
     25                 PyThread_acquire_lock((obj)->lock, 1); \
     26                 Py_END_ALLOW_THREADS \
     27             } \
     28         }
     29     #define LEAVE_HASHLIB(obj) \
     30         if ((obj)->lock) { \
     31             PyThread_release_lock((obj)->lock); \
     32         }
     33 #else
     34     #define ENTER_HASHLIB(obj)
     35     #define LEAVE_HASHLIB(obj)
     36 #endif
     37 
     38 /* EVP is the preferred interface to hashing in OpenSSL */
     39 #include <openssl/evp.h>
     40 
     41 #define MUNCH_SIZE INT_MAX
     42 
     43 /* TODO(gps): We should probably make this a module or EVPobject attribute
     44  * to allow the user to optimize based on the platform they're using. */
     45 #define HASHLIB_GIL_MINSIZE 2048
     46 
     47 #ifndef HASH_OBJ_CONSTRUCTOR
     48 #define HASH_OBJ_CONSTRUCTOR 0
     49 #endif
     50 
     51 /* Minimum OpenSSL version needed to support sha224 and higher. */
     52 #if defined(OPENSSL_VERSION_NUMBER) && (OPENSSL_VERSION_NUMBER >= 0x00908000)
     53 #define _OPENSSL_SUPPORTS_SHA2
     54 #endif
     55 
     56 typedef struct {
     57     PyObject_HEAD
     58     PyObject            *name;  /* name of this hash algorithm */
     59     EVP_MD_CTX          ctx;    /* OpenSSL message digest context */
     60 #ifdef WITH_THREAD
     61     PyThread_type_lock  lock;   /* OpenSSL context lock */
     62 #endif
     63 } EVPobject;
     64 
     65 
     66 static PyTypeObject EVPtype;
     67 
     68 
     69 #define DEFINE_CONSTS_FOR_NEW(Name)  \
     70     static PyObject *CONST_ ## Name ## _name_obj; \
     71     static EVP_MD_CTX CONST_new_ ## Name ## _ctx; \
     72     static EVP_MD_CTX *CONST_new_ ## Name ## _ctx_p = NULL;
     73 
     74 DEFINE_CONSTS_FOR_NEW(md5)
     75 DEFINE_CONSTS_FOR_NEW(sha1)
     76 #ifdef _OPENSSL_SUPPORTS_SHA2
     77 DEFINE_CONSTS_FOR_NEW(sha224)
     78 DEFINE_CONSTS_FOR_NEW(sha256)
     79 DEFINE_CONSTS_FOR_NEW(sha384)
     80 DEFINE_CONSTS_FOR_NEW(sha512)
     81 #endif
     82 
     83 
     84 static EVPobject *
     85 newEVPobject(PyObject *name)
     86 {
     87     EVPobject *retval = (EVPobject *)PyObject_New(EVPobject, &EVPtype);
     88 
     89     /* save the name for .name to return */
     90     if (retval != NULL) {
     91         Py_INCREF(name);
     92         retval->name = name;
     93 #ifdef WITH_THREAD
     94         retval->lock = NULL;
     95 #endif
     96     }
     97 
     98     return retval;
     99 }
    100 
    101 static void
    102 EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len)
    103 {
    104     unsigned int process;
    105     const unsigned char *cp = (const unsigned char *)vp;
    106     while (0 < len)
    107     {
    108         if (len > (Py_ssize_t)MUNCH_SIZE)
    109             process = MUNCH_SIZE;
    110         else
    111             process = Py_SAFE_DOWNCAST(len, Py_ssize_t, unsigned int);
    112         EVP_DigestUpdate(&self->ctx, (const void*)cp, process);
    113         len -= process;
    114         cp += process;
    115     }
    116 }
    117 
    118 /* Internal methods for a hash object */
    119 
    120 static void
    121 EVP_dealloc(EVPobject *self)
    122 {
    123 #ifdef WITH_THREAD
    124     if (self->lock != NULL)
    125         PyThread_free_lock(self->lock);
    126 #endif
    127     EVP_MD_CTX_cleanup(&self->ctx);
    128     Py_XDECREF(self->name);
    129     PyObject_Del(self);
    130 }
    131 
    132 static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self)
    133 {
    134     ENTER_HASHLIB(self);
    135     EVP_MD_CTX_copy(new_ctx_p, &self->ctx);
    136     LEAVE_HASHLIB(self);
    137 }
    138 
    139 /* External methods for a hash object */
    140 
    141 PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");
    142 
    143 
    144 static PyObject *
    145 EVP_copy(EVPobject *self, PyObject *unused)
    146 {
    147     EVPobject *newobj;
    148 
    149     if ( (newobj = newEVPobject(self->name))==NULL)
    150         return NULL;
    151 
    152     locked_EVP_MD_CTX_copy(&newobj->ctx, self);
    153     return (PyObject *)newobj;
    154 }
    155 
    156 PyDoc_STRVAR(EVP_digest__doc__,
    157 "Return the digest value as a string of binary data.");
    158 
    159 static PyObject *
    160 EVP_digest(EVPobject *self, PyObject *unused)
    161 {
    162     unsigned char digest[EVP_MAX_MD_SIZE];
    163     EVP_MD_CTX temp_ctx;
    164     PyObject *retval;
    165     unsigned int digest_size;
    166 
    167     locked_EVP_MD_CTX_copy(&temp_ctx, self);
    168     digest_size = EVP_MD_CTX_size(&temp_ctx);
    169     EVP_DigestFinal(&temp_ctx, digest, NULL);
    170 
    171     retval = PyString_FromStringAndSize((const char *)digest, digest_size);
    172     EVP_MD_CTX_cleanup(&temp_ctx);
    173     return retval;
    174 }
    175 
    176 PyDoc_STRVAR(EVP_hexdigest__doc__,
    177 "Return the digest value as a string of hexadecimal digits.");
    178 
    179 static PyObject *
    180 EVP_hexdigest(EVPobject *self, PyObject *unused)
    181 {
    182     unsigned char digest[EVP_MAX_MD_SIZE];
    183     EVP_MD_CTX temp_ctx;
    184     PyObject *retval;
    185     char *hex_digest;
    186     unsigned int i, j, digest_size;
    187 
    188     /* Get the raw (binary) digest value */
    189     locked_EVP_MD_CTX_copy(&temp_ctx, self);
    190     digest_size = EVP_MD_CTX_size(&temp_ctx);
    191     EVP_DigestFinal(&temp_ctx, digest, NULL);
    192 
    193     EVP_MD_CTX_cleanup(&temp_ctx);
    194 
    195     /* Create a new string */
    196     /* NOTE: not thread safe! modifying an already created string object */
    197     /* (not a problem because we hold the GIL by default) */
    198     retval = PyString_FromStringAndSize(NULL, digest_size * 2);
    199     if (!retval)
    200             return NULL;
    201     hex_digest = PyString_AsString(retval);
    202     if (!hex_digest) {
    203             Py_DECREF(retval);
    204             return NULL;
    205     }
    206 
    207     /* Make hex version of the digest */
    208     for(i=j=0; i<digest_size; i++) {
    209         char c;
    210         c = (digest[i] >> 4) & 0xf;
    211         c = (c>9) ? c+'a'-10 : c + '0';
    212         hex_digest[j++] = c;
    213         c = (digest[i] & 0xf);
    214         c = (c>9) ? c+'a'-10 : c + '0';
    215         hex_digest[j++] = c;
    216     }
    217     return retval;
    218 }
    219 
    220 PyDoc_STRVAR(EVP_update__doc__,
    221 "Update this hash object's state with the provided string.");
    222 
    223 static PyObject *
    224 EVP_update(EVPobject *self, PyObject *args)
    225 {
    226     Py_buffer view;
    227 
    228     if (!PyArg_ParseTuple(args, "s*:update", &view))
    229         return NULL;
    230 
    231 #ifdef WITH_THREAD
    232     if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) {
    233         self->lock = PyThread_allocate_lock();
    234         /* fail? lock = NULL and we fail over to non-threaded code. */
    235     }
    236 
    237     if (self->lock != NULL) {
    238         Py_BEGIN_ALLOW_THREADS
    239         PyThread_acquire_lock(self->lock, 1);
    240         EVP_hash(self, view.buf, view.len);
    241         PyThread_release_lock(self->lock);
    242         Py_END_ALLOW_THREADS
    243     }
    244     else
    245 #endif
    246     {
    247         EVP_hash(self, view.buf, view.len);
    248     }
    249 
    250     PyBuffer_Release(&view);
    251 
    252     Py_RETURN_NONE;
    253 }
    254 
    255 static PyMethodDef EVP_methods[] = {
    256     {"update",    (PyCFunction)EVP_update,    METH_VARARGS, EVP_update__doc__},
    257     {"digest",    (PyCFunction)EVP_digest,    METH_NOARGS,  EVP_digest__doc__},
    258     {"hexdigest", (PyCFunction)EVP_hexdigest, METH_NOARGS,  EVP_hexdigest__doc__},
    259     {"copy",      (PyCFunction)EVP_copy,      METH_NOARGS,  EVP_copy__doc__},
    260     {NULL,        NULL}         /* sentinel */
    261 };
    262 
    263 static PyObject *
    264 EVP_get_block_size(EVPobject *self, void *closure)
    265 {
    266     long block_size;
    267     block_size = EVP_MD_CTX_block_size(&self->ctx);
    268     return PyLong_FromLong(block_size);
    269 }
    270 
    271 static PyObject *
    272 EVP_get_digest_size(EVPobject *self, void *closure)
    273 {
    274     long size;
    275     size = EVP_MD_CTX_size(&self->ctx);
    276     return PyLong_FromLong(size);
    277 }
    278 
    279 static PyMemberDef EVP_members[] = {
    280     {"name", T_OBJECT, offsetof(EVPobject, name), READONLY, PyDoc_STR("algorithm name.")},
    281     {NULL}  /* Sentinel */
    282 };
    283 
    284 static PyGetSetDef EVP_getseters[] = {
    285     {"digest_size",
    286      (getter)EVP_get_digest_size, NULL,
    287      NULL,
    288      NULL},
    289     {"block_size",
    290      (getter)EVP_get_block_size, NULL,
    291      NULL,
    292      NULL},
    293     /* the old md5 and sha modules support 'digest_size' as in PEP 247.
    294      * the old sha module also supported 'digestsize'.  ugh. */
    295     {"digestsize",
    296      (getter)EVP_get_digest_size, NULL,
    297      NULL,
    298      NULL},
    299     {NULL}  /* Sentinel */
    300 };
    301 
    302 
    303 static PyObject *
    304 EVP_repr(PyObject *self)
    305 {
    306     char buf[100];
    307     PyOS_snprintf(buf, sizeof(buf), "<%s HASH object @ %p>",
    308             PyString_AsString(((EVPobject *)self)->name), self);
    309     return PyString_FromString(buf);
    310 }
    311 
    312 #if HASH_OBJ_CONSTRUCTOR
    313 static int
    314 EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds)
    315 {
    316     static char *kwlist[] = {"name", "string", NULL};
    317     PyObject *name_obj = NULL;
    318     Py_buffer view = { 0 };
    319     char *nameStr;
    320     const EVP_MD *digest;
    321 
    322     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s*:HASH", kwlist,
    323                                      &name_obj, &view)) {
    324         return -1;
    325     }
    326 
    327     if (!PyArg_Parse(name_obj, "s", &nameStr)) {
    328         PyErr_SetString(PyExc_TypeError, "name must be a string");
    329         PyBuffer_Release(&view);
    330         return -1;
    331     }
    332 
    333     digest = EVP_get_digestbyname(nameStr);
    334     if (!digest) {
    335         PyErr_SetString(PyExc_ValueError, "unknown hash function");
    336         PyBuffer_Release(&view);
    337         return -1;
    338     }
    339     EVP_DigestInit(&self->ctx, digest);
    340 
    341     self->name = name_obj;
    342     Py_INCREF(self->name);
    343 
    344     if (view.obj) {
    345         if (view.len >= HASHLIB_GIL_MINSIZE) {
    346             Py_BEGIN_ALLOW_THREADS
    347             EVP_hash(self, view.buf, view.len);
    348             Py_END_ALLOW_THREADS
    349         } else {
    350             EVP_hash(self, view.buf, view.len);
    351         }
    352         PyBuffer_Release(&view);
    353     }
    354 
    355     return 0;
    356 }
    357 #endif
    358 
    359 
    360 PyDoc_STRVAR(hashtype_doc,
    361 "A hash represents the object used to calculate a checksum of a\n\
    362 string of information.\n\
    363 \n\
    364 Methods:\n\
    365 \n\
    366 update() -- updates the current digest with an additional string\n\
    367 digest() -- return the current digest value\n\
    368 hexdigest() -- return the current digest as a string of hexadecimal digits\n\
    369 copy() -- return a copy of the current hash object\n\
    370 \n\
    371 Attributes:\n\
    372 \n\
    373 name -- the hash algorithm being used by this object\n\
    374 digest_size -- number of bytes in this hashes output\n");
    375 
    376 static PyTypeObject EVPtype = {
    377     PyVarObject_HEAD_INIT(NULL, 0)
    378     "_hashlib.HASH",    /*tp_name*/
    379     sizeof(EVPobject),  /*tp_basicsize*/
    380     0,                  /*tp_itemsize*/
    381     /* methods */
    382     (destructor)EVP_dealloc,    /*tp_dealloc*/
    383     0,                  /*tp_print*/
    384     0,                  /*tp_getattr*/
    385     0,                  /*tp_setattr*/
    386     0,                  /*tp_compare*/
    387     EVP_repr,           /*tp_repr*/
    388     0,                  /*tp_as_number*/
    389     0,                  /*tp_as_sequence*/
    390     0,                  /*tp_as_mapping*/
    391     0,                  /*tp_hash*/
    392     0,                  /*tp_call*/
    393     0,                  /*tp_str*/
    394     0,                  /*tp_getattro*/
    395     0,                  /*tp_setattro*/
    396     0,                  /*tp_as_buffer*/
    397     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
    398     hashtype_doc,       /*tp_doc*/
    399     0,                  /*tp_traverse*/
    400     0,                  /*tp_clear*/
    401     0,                  /*tp_richcompare*/
    402     0,                  /*tp_weaklistoffset*/
    403     0,                  /*tp_iter*/
    404     0,                  /*tp_iternext*/
    405     EVP_methods,        /* tp_methods */
    406     EVP_members,        /* tp_members */
    407     EVP_getseters,      /* tp_getset */
    408 #if 1
    409     0,                  /* tp_base */
    410     0,                  /* tp_dict */
    411     0,                  /* tp_descr_get */
    412     0,                  /* tp_descr_set */
    413     0,                  /* tp_dictoffset */
    414 #endif
    415 #if HASH_OBJ_CONSTRUCTOR
    416     (initproc)EVP_tp_init, /* tp_init */
    417 #endif
    418 };
    419 
    420 static PyObject *
    421 EVPnew(PyObject *name_obj,
    422        const EVP_MD *digest, const EVP_MD_CTX *initial_ctx,
    423        const unsigned char *cp, Py_ssize_t len)
    424 {
    425     EVPobject *self;
    426 
    427     if (!digest && !initial_ctx) {
    428         PyErr_SetString(PyExc_ValueError, "unsupported hash type");
    429         return NULL;
    430     }
    431 
    432     if ((self = newEVPobject(name_obj)) == NULL)
    433         return NULL;
    434 
    435     if (initial_ctx) {
    436         EVP_MD_CTX_copy(&self->ctx, initial_ctx);
    437     } else {
    438         EVP_DigestInit(&self->ctx, digest);
    439     }
    440 
    441     if (cp && len) {
    442         if (len >= HASHLIB_GIL_MINSIZE) {
    443             Py_BEGIN_ALLOW_THREADS
    444             EVP_hash(self, cp, len);
    445             Py_END_ALLOW_THREADS
    446         } else {
    447             EVP_hash(self, cp, len);
    448         }
    449     }
    450 
    451     return (PyObject *)self;
    452 }
    453 
    454 
    455 /* The module-level function: new() */
    456 
    457 PyDoc_STRVAR(EVP_new__doc__,
    458 "Return a new hash object using the named algorithm.\n\
    459 An optional string argument may be provided and will be\n\
    460 automatically hashed.\n\
    461 \n\
    462 The MD5 and SHA1 algorithms are always supported.\n");
    463 
    464 static PyObject *
    465 EVP_new(PyObject *self, PyObject *args, PyObject *kwdict)
    466 {
    467     static char *kwlist[] = {"name", "string", NULL};
    468     PyObject *name_obj = NULL;
    469     Py_buffer view = { 0 };
    470     PyObject *ret_obj;
    471     char *name;
    472     const EVP_MD *digest;
    473 
    474     if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O|s*:new", kwlist,
    475                                      &name_obj, &view)) {
    476         return NULL;
    477     }
    478 
    479     if (!PyArg_Parse(name_obj, "s", &name)) {
    480         PyErr_SetString(PyExc_TypeError, "name must be a string");
    481         return NULL;
    482     }
    483 
    484     digest = EVP_get_digestbyname(name);
    485 
    486     ret_obj = EVPnew(name_obj, digest, NULL, (unsigned char*)view.buf,
    487                      view.len);
    488     PyBuffer_Release(&view);
    489 
    490     return ret_obj;
    491 }
    492 
    493 /*
    494  *  This macro generates constructor function definitions for specific
    495  *  hash algorithms.  These constructors are much faster than calling
    496  *  the generic one passing it a python string and are noticably
    497  *  faster than calling a python new() wrapper.  Thats important for
    498  *  code that wants to make hashes of a bunch of small strings.
    499  */
    500 #define GEN_CONSTRUCTOR(NAME)  \
    501     static PyObject * \
    502     EVP_new_ ## NAME (PyObject *self, PyObject *args) \
    503     { \
    504         Py_buffer view = { 0 }; \
    505         PyObject *ret_obj; \
    506      \
    507         if (!PyArg_ParseTuple(args, "|s*:" #NAME , &view)) { \
    508             return NULL; \
    509         } \
    510      \
    511         ret_obj = EVPnew( \
    512                     CONST_ ## NAME ## _name_obj, \
    513                     NULL, \
    514                     CONST_new_ ## NAME ## _ctx_p, \
    515                     (unsigned char*)view.buf, view.len); \
    516         PyBuffer_Release(&view); \
    517         return ret_obj; \
    518     }
    519 
    520 /* a PyMethodDef structure for the constructor */
    521 #define CONSTRUCTOR_METH_DEF(NAME)  \
    522     {"openssl_" #NAME, (PyCFunction)EVP_new_ ## NAME, METH_VARARGS, \
    523         PyDoc_STR("Returns a " #NAME \
    524                   " hash object; optionally initialized with a string") \
    525     }
    526 
    527 /* used in the init function to setup a constructor */
    528 #define INIT_CONSTRUCTOR_CONSTANTS(NAME)  do { \
    529     CONST_ ## NAME ## _name_obj = PyString_FromString(#NAME); \
    530     if (EVP_get_digestbyname(#NAME)) { \
    531         CONST_new_ ## NAME ## _ctx_p = &CONST_new_ ## NAME ## _ctx; \
    532         EVP_DigestInit(CONST_new_ ## NAME ## _ctx_p, EVP_get_digestbyname(#NAME)); \
    533     } \
    534 } while (0);
    535 
    536 GEN_CONSTRUCTOR(md5)
    537 GEN_CONSTRUCTOR(sha1)
    538 #ifdef _OPENSSL_SUPPORTS_SHA2
    539 GEN_CONSTRUCTOR(sha224)
    540 GEN_CONSTRUCTOR(sha256)
    541 GEN_CONSTRUCTOR(sha384)
    542 GEN_CONSTRUCTOR(sha512)
    543 #endif
    544 
    545 /* List of functions exported by this module */
    546 
    547 static struct PyMethodDef EVP_functions[] = {
    548     {"new", (PyCFunction)EVP_new, METH_VARARGS|METH_KEYWORDS, EVP_new__doc__},
    549     CONSTRUCTOR_METH_DEF(md5),
    550     CONSTRUCTOR_METH_DEF(sha1),
    551 #ifdef _OPENSSL_SUPPORTS_SHA2
    552     CONSTRUCTOR_METH_DEF(sha224),
    553     CONSTRUCTOR_METH_DEF(sha256),
    554     CONSTRUCTOR_METH_DEF(sha384),
    555     CONSTRUCTOR_METH_DEF(sha512),
    556 #endif
    557     {NULL,      NULL}            /* Sentinel */
    558 };
    559 
    560 
    561 /* Initialize this module. */
    562 
    563 PyMODINIT_FUNC
    564 init_hashlib(void)
    565 {
    566     PyObject *m;
    567 
    568     OpenSSL_add_all_digests();
    569 
    570     /* TODO build EVP_functions openssl_* entries dynamically based
    571      * on what hashes are supported rather than listing many
    572      * but having some be unsupported.  Only init appropriate
    573      * constants. */
    574 
    575     Py_TYPE(&EVPtype) = &PyType_Type;
    576     if (PyType_Ready(&EVPtype) < 0)
    577         return;
    578 
    579     m = Py_InitModule("_hashlib", EVP_functions);
    580     if (m == NULL)
    581         return;
    582 
    583 #if HASH_OBJ_CONSTRUCTOR
    584     Py_INCREF(&EVPtype);
    585     PyModule_AddObject(m, "HASH", (PyObject *)&EVPtype);
    586 #endif
    587 
    588     /* these constants are used by the convenience constructors */
    589     INIT_CONSTRUCTOR_CONSTANTS(md5);
    590     INIT_CONSTRUCTOR_CONSTANTS(sha1);
    591 #ifdef _OPENSSL_SUPPORTS_SHA2
    592     INIT_CONSTRUCTOR_CONSTANTS(sha224);
    593     INIT_CONSTRUCTOR_CONSTANTS(sha256);
    594     INIT_CONSTRUCTOR_CONSTANTS(sha384);
    595     INIT_CONSTRUCTOR_CONSTANTS(sha512);
    596 #endif
    597 }
    598