1 /* Module that wraps all OpenSSL hash algorithms */ 2 3 /* 4 * Copyright (C) 2005-2010 Gregory P. Smith (greg (at) krypto.org) 5 * Licensed to PSF under a Contributor Agreement. 6 * 7 * Derived from a skeleton of shamodule.c containing work performed by: 8 * 9 * Andrew Kuchling (amk (at) amk.ca) 10 * Greg Stein (gstein (at) lyra.org) 11 * 12 */ 13 14 #define PY_SSIZE_T_CLEAN 15 16 #include "Python.h" 17 #include "structmember.h" 18 19 #ifdef WITH_THREAD 20 #include "pythread.h" 21 #define ENTER_HASHLIB(obj) \ 22 if ((obj)->lock) { \ 23 if (!PyThread_acquire_lock((obj)->lock, 0)) { \ 24 Py_BEGIN_ALLOW_THREADS \ 25 PyThread_acquire_lock((obj)->lock, 1); \ 26 Py_END_ALLOW_THREADS \ 27 } \ 28 } 29 #define LEAVE_HASHLIB(obj) \ 30 if ((obj)->lock) { \ 31 PyThread_release_lock((obj)->lock); \ 32 } 33 #else 34 #define ENTER_HASHLIB(obj) 35 #define LEAVE_HASHLIB(obj) 36 #endif 37 38 /* EVP is the preferred interface to hashing in OpenSSL */ 39 #include <openssl/evp.h> 40 41 #define MUNCH_SIZE INT_MAX 42 43 /* TODO(gps): We should probably make this a module or EVPobject attribute 44 * to allow the user to optimize based on the platform they're using. */ 45 #define HASHLIB_GIL_MINSIZE 2048 46 47 #ifndef HASH_OBJ_CONSTRUCTOR 48 #define HASH_OBJ_CONSTRUCTOR 0 49 #endif 50 51 /* Minimum OpenSSL version needed to support sha224 and higher. */ 52 #if defined(OPENSSL_VERSION_NUMBER) && (OPENSSL_VERSION_NUMBER >= 0x00908000) 53 #define _OPENSSL_SUPPORTS_SHA2 54 #endif 55 56 typedef struct { 57 PyObject_HEAD 58 PyObject *name; /* name of this hash algorithm */ 59 EVP_MD_CTX ctx; /* OpenSSL message digest context */ 60 #ifdef WITH_THREAD 61 PyThread_type_lock lock; /* OpenSSL context lock */ 62 #endif 63 } EVPobject; 64 65 66 static PyTypeObject EVPtype; 67 68 69 #define DEFINE_CONSTS_FOR_NEW(Name) \ 70 static PyObject *CONST_ ## Name ## _name_obj; \ 71 static EVP_MD_CTX CONST_new_ ## Name ## _ctx; \ 72 static EVP_MD_CTX *CONST_new_ ## Name ## _ctx_p = NULL; 73 74 DEFINE_CONSTS_FOR_NEW(md5) 75 DEFINE_CONSTS_FOR_NEW(sha1) 76 #ifdef _OPENSSL_SUPPORTS_SHA2 77 DEFINE_CONSTS_FOR_NEW(sha224) 78 DEFINE_CONSTS_FOR_NEW(sha256) 79 DEFINE_CONSTS_FOR_NEW(sha384) 80 DEFINE_CONSTS_FOR_NEW(sha512) 81 #endif 82 83 84 static EVPobject * 85 newEVPobject(PyObject *name) 86 { 87 EVPobject *retval = (EVPobject *)PyObject_New(EVPobject, &EVPtype); 88 89 /* save the name for .name to return */ 90 if (retval != NULL) { 91 Py_INCREF(name); 92 retval->name = name; 93 #ifdef WITH_THREAD 94 retval->lock = NULL; 95 #endif 96 } 97 98 return retval; 99 } 100 101 static void 102 EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len) 103 { 104 unsigned int process; 105 const unsigned char *cp = (const unsigned char *)vp; 106 while (0 < len) 107 { 108 if (len > (Py_ssize_t)MUNCH_SIZE) 109 process = MUNCH_SIZE; 110 else 111 process = Py_SAFE_DOWNCAST(len, Py_ssize_t, unsigned int); 112 EVP_DigestUpdate(&self->ctx, (const void*)cp, process); 113 len -= process; 114 cp += process; 115 } 116 } 117 118 /* Internal methods for a hash object */ 119 120 static void 121 EVP_dealloc(EVPobject *self) 122 { 123 #ifdef WITH_THREAD 124 if (self->lock != NULL) 125 PyThread_free_lock(self->lock); 126 #endif 127 EVP_MD_CTX_cleanup(&self->ctx); 128 Py_XDECREF(self->name); 129 PyObject_Del(self); 130 } 131 132 static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self) 133 { 134 ENTER_HASHLIB(self); 135 EVP_MD_CTX_copy(new_ctx_p, &self->ctx); 136 LEAVE_HASHLIB(self); 137 } 138 139 /* External methods for a hash object */ 140 141 PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object."); 142 143 144 static PyObject * 145 EVP_copy(EVPobject *self, PyObject *unused) 146 { 147 EVPobject *newobj; 148 149 if ( (newobj = newEVPobject(self->name))==NULL) 150 return NULL; 151 152 locked_EVP_MD_CTX_copy(&newobj->ctx, self); 153 return (PyObject *)newobj; 154 } 155 156 PyDoc_STRVAR(EVP_digest__doc__, 157 "Return the digest value as a string of binary data."); 158 159 static PyObject * 160 EVP_digest(EVPobject *self, PyObject *unused) 161 { 162 unsigned char digest[EVP_MAX_MD_SIZE]; 163 EVP_MD_CTX temp_ctx; 164 PyObject *retval; 165 unsigned int digest_size; 166 167 locked_EVP_MD_CTX_copy(&temp_ctx, self); 168 digest_size = EVP_MD_CTX_size(&temp_ctx); 169 EVP_DigestFinal(&temp_ctx, digest, NULL); 170 171 retval = PyString_FromStringAndSize((const char *)digest, digest_size); 172 EVP_MD_CTX_cleanup(&temp_ctx); 173 return retval; 174 } 175 176 PyDoc_STRVAR(EVP_hexdigest__doc__, 177 "Return the digest value as a string of hexadecimal digits."); 178 179 static PyObject * 180 EVP_hexdigest(EVPobject *self, PyObject *unused) 181 { 182 unsigned char digest[EVP_MAX_MD_SIZE]; 183 EVP_MD_CTX temp_ctx; 184 PyObject *retval; 185 char *hex_digest; 186 unsigned int i, j, digest_size; 187 188 /* Get the raw (binary) digest value */ 189 locked_EVP_MD_CTX_copy(&temp_ctx, self); 190 digest_size = EVP_MD_CTX_size(&temp_ctx); 191 EVP_DigestFinal(&temp_ctx, digest, NULL); 192 193 EVP_MD_CTX_cleanup(&temp_ctx); 194 195 /* Create a new string */ 196 /* NOTE: not thread safe! modifying an already created string object */ 197 /* (not a problem because we hold the GIL by default) */ 198 retval = PyString_FromStringAndSize(NULL, digest_size * 2); 199 if (!retval) 200 return NULL; 201 hex_digest = PyString_AsString(retval); 202 if (!hex_digest) { 203 Py_DECREF(retval); 204 return NULL; 205 } 206 207 /* Make hex version of the digest */ 208 for(i=j=0; i<digest_size; i++) { 209 char c; 210 c = (digest[i] >> 4) & 0xf; 211 c = (c>9) ? c+'a'-10 : c + '0'; 212 hex_digest[j++] = c; 213 c = (digest[i] & 0xf); 214 c = (c>9) ? c+'a'-10 : c + '0'; 215 hex_digest[j++] = c; 216 } 217 return retval; 218 } 219 220 PyDoc_STRVAR(EVP_update__doc__, 221 "Update this hash object's state with the provided string."); 222 223 static PyObject * 224 EVP_update(EVPobject *self, PyObject *args) 225 { 226 Py_buffer view; 227 228 if (!PyArg_ParseTuple(args, "s*:update", &view)) 229 return NULL; 230 231 #ifdef WITH_THREAD 232 if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) { 233 self->lock = PyThread_allocate_lock(); 234 /* fail? lock = NULL and we fail over to non-threaded code. */ 235 } 236 237 if (self->lock != NULL) { 238 Py_BEGIN_ALLOW_THREADS 239 PyThread_acquire_lock(self->lock, 1); 240 EVP_hash(self, view.buf, view.len); 241 PyThread_release_lock(self->lock); 242 Py_END_ALLOW_THREADS 243 } 244 else 245 #endif 246 { 247 EVP_hash(self, view.buf, view.len); 248 } 249 250 PyBuffer_Release(&view); 251 252 Py_RETURN_NONE; 253 } 254 255 static PyMethodDef EVP_methods[] = { 256 {"update", (PyCFunction)EVP_update, METH_VARARGS, EVP_update__doc__}, 257 {"digest", (PyCFunction)EVP_digest, METH_NOARGS, EVP_digest__doc__}, 258 {"hexdigest", (PyCFunction)EVP_hexdigest, METH_NOARGS, EVP_hexdigest__doc__}, 259 {"copy", (PyCFunction)EVP_copy, METH_NOARGS, EVP_copy__doc__}, 260 {NULL, NULL} /* sentinel */ 261 }; 262 263 static PyObject * 264 EVP_get_block_size(EVPobject *self, void *closure) 265 { 266 long block_size; 267 block_size = EVP_MD_CTX_block_size(&self->ctx); 268 return PyLong_FromLong(block_size); 269 } 270 271 static PyObject * 272 EVP_get_digest_size(EVPobject *self, void *closure) 273 { 274 long size; 275 size = EVP_MD_CTX_size(&self->ctx); 276 return PyLong_FromLong(size); 277 } 278 279 static PyMemberDef EVP_members[] = { 280 {"name", T_OBJECT, offsetof(EVPobject, name), READONLY, PyDoc_STR("algorithm name.")}, 281 {NULL} /* Sentinel */ 282 }; 283 284 static PyGetSetDef EVP_getseters[] = { 285 {"digest_size", 286 (getter)EVP_get_digest_size, NULL, 287 NULL, 288 NULL}, 289 {"block_size", 290 (getter)EVP_get_block_size, NULL, 291 NULL, 292 NULL}, 293 /* the old md5 and sha modules support 'digest_size' as in PEP 247. 294 * the old sha module also supported 'digestsize'. ugh. */ 295 {"digestsize", 296 (getter)EVP_get_digest_size, NULL, 297 NULL, 298 NULL}, 299 {NULL} /* Sentinel */ 300 }; 301 302 303 static PyObject * 304 EVP_repr(PyObject *self) 305 { 306 char buf[100]; 307 PyOS_snprintf(buf, sizeof(buf), "<%s HASH object @ %p>", 308 PyString_AsString(((EVPobject *)self)->name), self); 309 return PyString_FromString(buf); 310 } 311 312 #if HASH_OBJ_CONSTRUCTOR 313 static int 314 EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds) 315 { 316 static char *kwlist[] = {"name", "string", NULL}; 317 PyObject *name_obj = NULL; 318 Py_buffer view = { 0 }; 319 char *nameStr; 320 const EVP_MD *digest; 321 322 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s*:HASH", kwlist, 323 &name_obj, &view)) { 324 return -1; 325 } 326 327 if (!PyArg_Parse(name_obj, "s", &nameStr)) { 328 PyErr_SetString(PyExc_TypeError, "name must be a string"); 329 PyBuffer_Release(&view); 330 return -1; 331 } 332 333 digest = EVP_get_digestbyname(nameStr); 334 if (!digest) { 335 PyErr_SetString(PyExc_ValueError, "unknown hash function"); 336 PyBuffer_Release(&view); 337 return -1; 338 } 339 EVP_DigestInit(&self->ctx, digest); 340 341 self->name = name_obj; 342 Py_INCREF(self->name); 343 344 if (view.obj) { 345 if (view.len >= HASHLIB_GIL_MINSIZE) { 346 Py_BEGIN_ALLOW_THREADS 347 EVP_hash(self, view.buf, view.len); 348 Py_END_ALLOW_THREADS 349 } else { 350 EVP_hash(self, view.buf, view.len); 351 } 352 PyBuffer_Release(&view); 353 } 354 355 return 0; 356 } 357 #endif 358 359 360 PyDoc_STRVAR(hashtype_doc, 361 "A hash represents the object used to calculate a checksum of a\n\ 362 string of information.\n\ 363 \n\ 364 Methods:\n\ 365 \n\ 366 update() -- updates the current digest with an additional string\n\ 367 digest() -- return the current digest value\n\ 368 hexdigest() -- return the current digest as a string of hexadecimal digits\n\ 369 copy() -- return a copy of the current hash object\n\ 370 \n\ 371 Attributes:\n\ 372 \n\ 373 name -- the hash algorithm being used by this object\n\ 374 digest_size -- number of bytes in this hashes output\n"); 375 376 static PyTypeObject EVPtype = { 377 PyVarObject_HEAD_INIT(NULL, 0) 378 "_hashlib.HASH", /*tp_name*/ 379 sizeof(EVPobject), /*tp_basicsize*/ 380 0, /*tp_itemsize*/ 381 /* methods */ 382 (destructor)EVP_dealloc, /*tp_dealloc*/ 383 0, /*tp_print*/ 384 0, /*tp_getattr*/ 385 0, /*tp_setattr*/ 386 0, /*tp_compare*/ 387 EVP_repr, /*tp_repr*/ 388 0, /*tp_as_number*/ 389 0, /*tp_as_sequence*/ 390 0, /*tp_as_mapping*/ 391 0, /*tp_hash*/ 392 0, /*tp_call*/ 393 0, /*tp_str*/ 394 0, /*tp_getattro*/ 395 0, /*tp_setattro*/ 396 0, /*tp_as_buffer*/ 397 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ 398 hashtype_doc, /*tp_doc*/ 399 0, /*tp_traverse*/ 400 0, /*tp_clear*/ 401 0, /*tp_richcompare*/ 402 0, /*tp_weaklistoffset*/ 403 0, /*tp_iter*/ 404 0, /*tp_iternext*/ 405 EVP_methods, /* tp_methods */ 406 EVP_members, /* tp_members */ 407 EVP_getseters, /* tp_getset */ 408 #if 1 409 0, /* tp_base */ 410 0, /* tp_dict */ 411 0, /* tp_descr_get */ 412 0, /* tp_descr_set */ 413 0, /* tp_dictoffset */ 414 #endif 415 #if HASH_OBJ_CONSTRUCTOR 416 (initproc)EVP_tp_init, /* tp_init */ 417 #endif 418 }; 419 420 static PyObject * 421 EVPnew(PyObject *name_obj, 422 const EVP_MD *digest, const EVP_MD_CTX *initial_ctx, 423 const unsigned char *cp, Py_ssize_t len) 424 { 425 EVPobject *self; 426 427 if (!digest && !initial_ctx) { 428 PyErr_SetString(PyExc_ValueError, "unsupported hash type"); 429 return NULL; 430 } 431 432 if ((self = newEVPobject(name_obj)) == NULL) 433 return NULL; 434 435 if (initial_ctx) { 436 EVP_MD_CTX_copy(&self->ctx, initial_ctx); 437 } else { 438 EVP_DigestInit(&self->ctx, digest); 439 } 440 441 if (cp && len) { 442 if (len >= HASHLIB_GIL_MINSIZE) { 443 Py_BEGIN_ALLOW_THREADS 444 EVP_hash(self, cp, len); 445 Py_END_ALLOW_THREADS 446 } else { 447 EVP_hash(self, cp, len); 448 } 449 } 450 451 return (PyObject *)self; 452 } 453 454 455 /* The module-level function: new() */ 456 457 PyDoc_STRVAR(EVP_new__doc__, 458 "Return a new hash object using the named algorithm.\n\ 459 An optional string argument may be provided and will be\n\ 460 automatically hashed.\n\ 461 \n\ 462 The MD5 and SHA1 algorithms are always supported.\n"); 463 464 static PyObject * 465 EVP_new(PyObject *self, PyObject *args, PyObject *kwdict) 466 { 467 static char *kwlist[] = {"name", "string", NULL}; 468 PyObject *name_obj = NULL; 469 Py_buffer view = { 0 }; 470 PyObject *ret_obj; 471 char *name; 472 const EVP_MD *digest; 473 474 if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O|s*:new", kwlist, 475 &name_obj, &view)) { 476 return NULL; 477 } 478 479 if (!PyArg_Parse(name_obj, "s", &name)) { 480 PyErr_SetString(PyExc_TypeError, "name must be a string"); 481 return NULL; 482 } 483 484 digest = EVP_get_digestbyname(name); 485 486 ret_obj = EVPnew(name_obj, digest, NULL, (unsigned char*)view.buf, 487 view.len); 488 PyBuffer_Release(&view); 489 490 return ret_obj; 491 } 492 493 /* 494 * This macro generates constructor function definitions for specific 495 * hash algorithms. These constructors are much faster than calling 496 * the generic one passing it a python string and are noticably 497 * faster than calling a python new() wrapper. Thats important for 498 * code that wants to make hashes of a bunch of small strings. 499 */ 500 #define GEN_CONSTRUCTOR(NAME) \ 501 static PyObject * \ 502 EVP_new_ ## NAME (PyObject *self, PyObject *args) \ 503 { \ 504 Py_buffer view = { 0 }; \ 505 PyObject *ret_obj; \ 506 \ 507 if (!PyArg_ParseTuple(args, "|s*:" #NAME , &view)) { \ 508 return NULL; \ 509 } \ 510 \ 511 ret_obj = EVPnew( \ 512 CONST_ ## NAME ## _name_obj, \ 513 NULL, \ 514 CONST_new_ ## NAME ## _ctx_p, \ 515 (unsigned char*)view.buf, view.len); \ 516 PyBuffer_Release(&view); \ 517 return ret_obj; \ 518 } 519 520 /* a PyMethodDef structure for the constructor */ 521 #define CONSTRUCTOR_METH_DEF(NAME) \ 522 {"openssl_" #NAME, (PyCFunction)EVP_new_ ## NAME, METH_VARARGS, \ 523 PyDoc_STR("Returns a " #NAME \ 524 " hash object; optionally initialized with a string") \ 525 } 526 527 /* used in the init function to setup a constructor */ 528 #define INIT_CONSTRUCTOR_CONSTANTS(NAME) do { \ 529 CONST_ ## NAME ## _name_obj = PyString_FromString(#NAME); \ 530 if (EVP_get_digestbyname(#NAME)) { \ 531 CONST_new_ ## NAME ## _ctx_p = &CONST_new_ ## NAME ## _ctx; \ 532 EVP_DigestInit(CONST_new_ ## NAME ## _ctx_p, EVP_get_digestbyname(#NAME)); \ 533 } \ 534 } while (0); 535 536 GEN_CONSTRUCTOR(md5) 537 GEN_CONSTRUCTOR(sha1) 538 #ifdef _OPENSSL_SUPPORTS_SHA2 539 GEN_CONSTRUCTOR(sha224) 540 GEN_CONSTRUCTOR(sha256) 541 GEN_CONSTRUCTOR(sha384) 542 GEN_CONSTRUCTOR(sha512) 543 #endif 544 545 /* List of functions exported by this module */ 546 547 static struct PyMethodDef EVP_functions[] = { 548 {"new", (PyCFunction)EVP_new, METH_VARARGS|METH_KEYWORDS, EVP_new__doc__}, 549 CONSTRUCTOR_METH_DEF(md5), 550 CONSTRUCTOR_METH_DEF(sha1), 551 #ifdef _OPENSSL_SUPPORTS_SHA2 552 CONSTRUCTOR_METH_DEF(sha224), 553 CONSTRUCTOR_METH_DEF(sha256), 554 CONSTRUCTOR_METH_DEF(sha384), 555 CONSTRUCTOR_METH_DEF(sha512), 556 #endif 557 {NULL, NULL} /* Sentinel */ 558 }; 559 560 561 /* Initialize this module. */ 562 563 PyMODINIT_FUNC 564 init_hashlib(void) 565 { 566 PyObject *m; 567 568 OpenSSL_add_all_digests(); 569 570 /* TODO build EVP_functions openssl_* entries dynamically based 571 * on what hashes are supported rather than listing many 572 * but having some be unsupported. Only init appropriate 573 * constants. */ 574 575 Py_TYPE(&EVPtype) = &PyType_Type; 576 if (PyType_Ready(&EVPtype) < 0) 577 return; 578 579 m = Py_InitModule("_hashlib", EVP_functions); 580 if (m == NULL) 581 return; 582 583 #if HASH_OBJ_CONSTRUCTOR 584 Py_INCREF(&EVPtype); 585 PyModule_AddObject(m, "HASH", (PyObject *)&EVPtype); 586 #endif 587 588 /* these constants are used by the convenience constructors */ 589 INIT_CONSTRUCTOR_CONSTANTS(md5); 590 INIT_CONSTRUCTOR_CONSTANTS(sha1); 591 #ifdef _OPENSSL_SUPPORTS_SHA2 592 INIT_CONSTRUCTOR_CONSTANTS(sha224); 593 INIT_CONSTRUCTOR_CONSTANTS(sha256); 594 INIT_CONSTRUCTOR_CONSTANTS(sha384); 595 INIT_CONSTRUCTOR_CONSTANTS(sha512); 596 #endif 597 } 598