1 /* 2 3 python-bz2 - python bz2 library interface 4 5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer (at) conectiva.com> 6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved 7 8 */ 9 10 #include "Python.h" 11 #include <stdio.h> 12 #include <bzlib.h> 13 #include "structmember.h" 14 15 #ifdef WITH_THREAD 16 #include "pythread.h" 17 #endif 18 19 static char __author__[] = 20 "The bz2 python module was written by:\n\ 21 \n\ 22 Gustavo Niemeyer <niemeyer (at) conectiva.com>\n\ 23 "; 24 25 /* Our very own off_t-like type, 64-bit if possible */ 26 /* copied from Objects/fileobject.c */ 27 #if !defined(HAVE_LARGEFILE_SUPPORT) 28 typedef off_t Py_off_t; 29 #elif SIZEOF_OFF_T >= 8 30 typedef off_t Py_off_t; 31 #elif SIZEOF_FPOS_T >= 8 32 typedef fpos_t Py_off_t; 33 #else 34 #error "Large file support, but neither off_t nor fpos_t is large enough." 35 #endif 36 37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v) 38 39 #define MODE_CLOSED 0 40 #define MODE_READ 1 41 #define MODE_READ_EOF 2 42 #define MODE_WRITE 3 43 44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type) 45 46 47 #ifdef BZ_CONFIG_ERROR 48 49 #if SIZEOF_LONG >= 8 50 #define BZS_TOTAL_OUT(bzs) \ 51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32) 52 #elif SIZEOF_LONG_LONG >= 8 53 #define BZS_TOTAL_OUT(bzs) \ 54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32) 55 #else 56 #define BZS_TOTAL_OUT(bzs) \ 57 bzs->total_out_lo32 58 #endif 59 60 #else /* ! BZ_CONFIG_ERROR */ 61 62 #define BZ2_bzRead bzRead 63 #define BZ2_bzReadOpen bzReadOpen 64 #define BZ2_bzReadClose bzReadClose 65 #define BZ2_bzWrite bzWrite 66 #define BZ2_bzWriteOpen bzWriteOpen 67 #define BZ2_bzWriteClose bzWriteClose 68 #define BZ2_bzCompress bzCompress 69 #define BZ2_bzCompressInit bzCompressInit 70 #define BZ2_bzCompressEnd bzCompressEnd 71 #define BZ2_bzDecompress bzDecompress 72 #define BZ2_bzDecompressInit bzDecompressInit 73 #define BZ2_bzDecompressEnd bzDecompressEnd 74 75 #define BZS_TOTAL_OUT(bzs) bzs->total_out 76 77 #endif /* ! BZ_CONFIG_ERROR */ 78 79 80 #ifdef WITH_THREAD 81 #define ACQUIRE_LOCK(obj) do { \ 82 if (!PyThread_acquire_lock(obj->lock, 0)) { \ 83 Py_BEGIN_ALLOW_THREADS \ 84 PyThread_acquire_lock(obj->lock, 1); \ 85 Py_END_ALLOW_THREADS \ 86 } } while(0) 87 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock) 88 #else 89 #define ACQUIRE_LOCK(obj) 90 #define RELEASE_LOCK(obj) 91 #endif 92 93 /* Bits in f_newlinetypes */ 94 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */ 95 #define NEWLINE_CR 1 /* \r newline seen */ 96 #define NEWLINE_LF 2 /* \n newline seen */ 97 #define NEWLINE_CRLF 4 /* \r\n newline seen */ 98 99 /* ===================================================================== */ 100 /* Structure definitions. */ 101 102 typedef struct { 103 PyObject_HEAD 104 PyObject *file; 105 106 char* f_buf; /* Allocated readahead buffer */ 107 char* f_bufend; /* Points after last occupied position */ 108 char* f_bufptr; /* Current buffer position */ 109 110 int f_softspace; /* Flag used by 'print' command */ 111 112 int f_univ_newline; /* Handle any newline convention */ 113 int f_newlinetypes; /* Types of newlines seen */ 114 int f_skipnextlf; /* Skip next \n */ 115 116 BZFILE *fp; 117 int mode; 118 Py_off_t pos; 119 Py_off_t size; 120 #ifdef WITH_THREAD 121 PyThread_type_lock lock; 122 #endif 123 } BZ2FileObject; 124 125 typedef struct { 126 PyObject_HEAD 127 bz_stream bzs; 128 int running; 129 #ifdef WITH_THREAD 130 PyThread_type_lock lock; 131 #endif 132 } BZ2CompObject; 133 134 typedef struct { 135 PyObject_HEAD 136 bz_stream bzs; 137 int running; 138 PyObject *unused_data; 139 #ifdef WITH_THREAD 140 PyThread_type_lock lock; 141 #endif 142 } BZ2DecompObject; 143 144 /* ===================================================================== */ 145 /* Utility functions. */ 146 147 /* Refuse regular I/O if there's data in the iteration-buffer. 148 * Mixing them would cause data to arrive out of order, as the read* 149 * methods don't use the iteration buffer. */ 150 static int 151 check_iterbuffered(BZ2FileObject *f) 152 { 153 if (f->f_buf != NULL && 154 (f->f_bufend - f->f_bufptr) > 0 && 155 f->f_buf[0] != '\0') { 156 PyErr_SetString(PyExc_ValueError, 157 "Mixing iteration and read methods would lose data"); 158 return -1; 159 } 160 return 0; 161 } 162 163 static int 164 Util_CatchBZ2Error(int bzerror) 165 { 166 int ret = 0; 167 switch(bzerror) { 168 case BZ_OK: 169 case BZ_STREAM_END: 170 break; 171 172 #ifdef BZ_CONFIG_ERROR 173 case BZ_CONFIG_ERROR: 174 PyErr_SetString(PyExc_SystemError, 175 "the bz2 library was not compiled " 176 "correctly"); 177 ret = 1; 178 break; 179 #endif 180 181 case BZ_PARAM_ERROR: 182 PyErr_SetString(PyExc_ValueError, 183 "the bz2 library has received wrong " 184 "parameters"); 185 ret = 1; 186 break; 187 188 case BZ_MEM_ERROR: 189 PyErr_NoMemory(); 190 ret = 1; 191 break; 192 193 case BZ_DATA_ERROR: 194 case BZ_DATA_ERROR_MAGIC: 195 PyErr_SetString(PyExc_IOError, "invalid data stream"); 196 ret = 1; 197 break; 198 199 case BZ_IO_ERROR: 200 PyErr_SetString(PyExc_IOError, "unknown IO error"); 201 ret = 1; 202 break; 203 204 case BZ_UNEXPECTED_EOF: 205 PyErr_SetString(PyExc_EOFError, 206 "compressed file ended before the " 207 "logical end-of-stream was detected"); 208 ret = 1; 209 break; 210 211 case BZ_SEQUENCE_ERROR: 212 PyErr_SetString(PyExc_RuntimeError, 213 "wrong sequence of bz2 library " 214 "commands used"); 215 ret = 1; 216 break; 217 } 218 return ret; 219 } 220 221 #if BUFSIZ < 8192 222 #define SMALLCHUNK 8192 223 #else 224 #define SMALLCHUNK BUFSIZ 225 #endif 226 227 #if SIZEOF_INT < 4 228 #define BIGCHUNK (512 * 32) 229 #else 230 #define BIGCHUNK (512 * 1024) 231 #endif 232 233 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */ 234 static size_t 235 Util_NewBufferSize(size_t currentsize) 236 { 237 if (currentsize > SMALLCHUNK) { 238 /* Keep doubling until we reach BIGCHUNK; 239 then keep adding BIGCHUNK. */ 240 if (currentsize <= BIGCHUNK) 241 return currentsize + currentsize; 242 else 243 return currentsize + BIGCHUNK; 244 } 245 return currentsize + SMALLCHUNK; 246 } 247 248 /* This is a hacked version of Python's fileobject.c:get_line(). */ 249 static PyObject * 250 Util_GetLine(BZ2FileObject *f, int n) 251 { 252 char c; 253 char *buf, *end; 254 size_t total_v_size; /* total # of slots in buffer */ 255 size_t used_v_size; /* # used slots in buffer */ 256 size_t increment; /* amount to increment the buffer */ 257 PyObject *v; 258 int bzerror; 259 int bytes_read; 260 int newlinetypes = f->f_newlinetypes; 261 int skipnextlf = f->f_skipnextlf; 262 int univ_newline = f->f_univ_newline; 263 264 total_v_size = n > 0 ? n : 100; 265 v = PyString_FromStringAndSize((char *)NULL, total_v_size); 266 if (v == NULL) 267 return NULL; 268 269 buf = BUF(v); 270 end = buf + total_v_size; 271 272 for (;;) { 273 Py_BEGIN_ALLOW_THREADS 274 while (buf != end) { 275 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1); 276 f->pos++; 277 if (bytes_read == 0) break; 278 if (univ_newline) { 279 if (skipnextlf) { 280 skipnextlf = 0; 281 if (c == '\n') { 282 /* Seeing a \n here with skipnextlf true means we 283 * saw a \r before. 284 */ 285 newlinetypes |= NEWLINE_CRLF; 286 if (bzerror != BZ_OK) break; 287 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1); 288 f->pos++; 289 if (bytes_read == 0) break; 290 } else { 291 newlinetypes |= NEWLINE_CR; 292 } 293 } 294 if (c == '\r') { 295 skipnextlf = 1; 296 c = '\n'; 297 } else if (c == '\n') 298 newlinetypes |= NEWLINE_LF; 299 } 300 *buf++ = c; 301 if (bzerror != BZ_OK || c == '\n') break; 302 } 303 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf) 304 newlinetypes |= NEWLINE_CR; 305 Py_END_ALLOW_THREADS 306 f->f_newlinetypes = newlinetypes; 307 f->f_skipnextlf = skipnextlf; 308 if (bzerror == BZ_STREAM_END) { 309 f->size = f->pos; 310 f->mode = MODE_READ_EOF; 311 break; 312 } else if (bzerror != BZ_OK) { 313 Util_CatchBZ2Error(bzerror); 314 Py_DECREF(v); 315 return NULL; 316 } 317 if (c == '\n') 318 break; 319 /* Must be because buf == end */ 320 if (n > 0) 321 break; 322 used_v_size = total_v_size; 323 increment = total_v_size >> 2; /* mild exponential growth */ 324 total_v_size += increment; 325 if (total_v_size > INT_MAX) { 326 PyErr_SetString(PyExc_OverflowError, 327 "line is longer than a Python string can hold"); 328 Py_DECREF(v); 329 return NULL; 330 } 331 if (_PyString_Resize(&v, total_v_size) < 0) 332 return NULL; 333 buf = BUF(v) + used_v_size; 334 end = BUF(v) + total_v_size; 335 } 336 337 used_v_size = buf - BUF(v); 338 if (used_v_size != total_v_size) 339 _PyString_Resize(&v, used_v_size); 340 return v; 341 } 342 343 /* This is a hacked version of Python's 344 * fileobject.c:Py_UniversalNewlineFread(). */ 345 size_t 346 Util_UnivNewlineRead(int *bzerror, BZFILE *stream, 347 char* buf, size_t n, BZ2FileObject *f) 348 { 349 char *dst = buf; 350 int newlinetypes, skipnextlf; 351 352 assert(buf != NULL); 353 assert(stream != NULL); 354 355 if (!f->f_univ_newline) 356 return BZ2_bzRead(bzerror, stream, buf, n); 357 358 newlinetypes = f->f_newlinetypes; 359 skipnextlf = f->f_skipnextlf; 360 361 /* Invariant: n is the number of bytes remaining to be filled 362 * in the buffer. 363 */ 364 while (n) { 365 size_t nread; 366 int shortread; 367 char *src = dst; 368 369 nread = BZ2_bzRead(bzerror, stream, dst, n); 370 assert(nread <= n); 371 n -= nread; /* assuming 1 byte out for each in; will adjust */ 372 shortread = n != 0; /* true iff EOF or error */ 373 while (nread--) { 374 char c = *src++; 375 if (c == '\r') { 376 /* Save as LF and set flag to skip next LF. */ 377 *dst++ = '\n'; 378 skipnextlf = 1; 379 } 380 else if (skipnextlf && c == '\n') { 381 /* Skip LF, and remember we saw CR LF. */ 382 skipnextlf = 0; 383 newlinetypes |= NEWLINE_CRLF; 384 ++n; 385 } 386 else { 387 /* Normal char to be stored in buffer. Also 388 * update the newlinetypes flag if either this 389 * is an LF or the previous char was a CR. 390 */ 391 if (c == '\n') 392 newlinetypes |= NEWLINE_LF; 393 else if (skipnextlf) 394 newlinetypes |= NEWLINE_CR; 395 *dst++ = c; 396 skipnextlf = 0; 397 } 398 } 399 if (shortread) { 400 /* If this is EOF, update type flags. */ 401 if (skipnextlf && *bzerror == BZ_STREAM_END) 402 newlinetypes |= NEWLINE_CR; 403 break; 404 } 405 } 406 f->f_newlinetypes = newlinetypes; 407 f->f_skipnextlf = skipnextlf; 408 return dst - buf; 409 } 410 411 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */ 412 static void 413 Util_DropReadAhead(BZ2FileObject *f) 414 { 415 if (f->f_buf != NULL) { 416 PyMem_Free(f->f_buf); 417 f->f_buf = NULL; 418 } 419 } 420 421 /* This is a hacked version of Python's fileobject.c:readahead(). */ 422 static int 423 Util_ReadAhead(BZ2FileObject *f, int bufsize) 424 { 425 int chunksize; 426 int bzerror; 427 428 if (f->f_buf != NULL) { 429 if((f->f_bufend - f->f_bufptr) >= 1) 430 return 0; 431 else 432 Util_DropReadAhead(f); 433 } 434 if (f->mode == MODE_READ_EOF) { 435 f->f_bufptr = f->f_buf; 436 f->f_bufend = f->f_buf; 437 return 0; 438 } 439 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) { 440 PyErr_NoMemory(); 441 return -1; 442 } 443 Py_BEGIN_ALLOW_THREADS 444 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf, 445 bufsize, f); 446 Py_END_ALLOW_THREADS 447 f->pos += chunksize; 448 if (bzerror == BZ_STREAM_END) { 449 f->size = f->pos; 450 f->mode = MODE_READ_EOF; 451 } else if (bzerror != BZ_OK) { 452 Util_CatchBZ2Error(bzerror); 453 Util_DropReadAhead(f); 454 return -1; 455 } 456 f->f_bufptr = f->f_buf; 457 f->f_bufend = f->f_buf + chunksize; 458 return 0; 459 } 460 461 /* This is a hacked version of Python's 462 * fileobject.c:readahead_get_line_skip(). */ 463 static PyStringObject * 464 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize) 465 { 466 PyStringObject* s; 467 char *bufptr; 468 char *buf; 469 int len; 470 471 if (f->f_buf == NULL) 472 if (Util_ReadAhead(f, bufsize) < 0) 473 return NULL; 474 475 len = f->f_bufend - f->f_bufptr; 476 if (len == 0) 477 return (PyStringObject *) 478 PyString_FromStringAndSize(NULL, skip); 479 bufptr = memchr(f->f_bufptr, '\n', len); 480 if (bufptr != NULL) { 481 bufptr++; /* Count the '\n' */ 482 len = bufptr - f->f_bufptr; 483 s = (PyStringObject *) 484 PyString_FromStringAndSize(NULL, skip+len); 485 if (s == NULL) 486 return NULL; 487 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len); 488 f->f_bufptr = bufptr; 489 if (bufptr == f->f_bufend) 490 Util_DropReadAhead(f); 491 } else { 492 bufptr = f->f_bufptr; 493 buf = f->f_buf; 494 f->f_buf = NULL; /* Force new readahead buffer */ 495 s = Util_ReadAheadGetLineSkip(f, skip+len, 496 bufsize + (bufsize>>2)); 497 if (s == NULL) { 498 PyMem_Free(buf); 499 return NULL; 500 } 501 memcpy(PyString_AS_STRING(s)+skip, bufptr, len); 502 PyMem_Free(buf); 503 } 504 return s; 505 } 506 507 /* ===================================================================== */ 508 /* Methods of BZ2File. */ 509 510 PyDoc_STRVAR(BZ2File_read__doc__, 511 "read([size]) -> string\n\ 512 \n\ 513 Read at most size uncompressed bytes, returned as a string. If the size\n\ 514 argument is negative or omitted, read until EOF is reached.\n\ 515 "); 516 517 /* This is a hacked version of Python's fileobject.c:file_read(). */ 518 static PyObject * 519 BZ2File_read(BZ2FileObject *self, PyObject *args) 520 { 521 long bytesrequested = -1; 522 size_t bytesread, buffersize, chunksize; 523 int bzerror; 524 PyObject *ret = NULL; 525 526 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested)) 527 return NULL; 528 529 ACQUIRE_LOCK(self); 530 switch (self->mode) { 531 case MODE_READ: 532 break; 533 case MODE_READ_EOF: 534 ret = PyString_FromString(""); 535 goto cleanup; 536 case MODE_CLOSED: 537 PyErr_SetString(PyExc_ValueError, 538 "I/O operation on closed file"); 539 goto cleanup; 540 default: 541 PyErr_SetString(PyExc_IOError, 542 "file is not ready for reading"); 543 goto cleanup; 544 } 545 546 /* refuse to mix with f.next() */ 547 if (check_iterbuffered(self)) 548 goto cleanup; 549 550 if (bytesrequested < 0) 551 buffersize = Util_NewBufferSize((size_t)0); 552 else 553 buffersize = bytesrequested; 554 if (buffersize > INT_MAX) { 555 PyErr_SetString(PyExc_OverflowError, 556 "requested number of bytes is " 557 "more than a Python string can hold"); 558 goto cleanup; 559 } 560 ret = PyString_FromStringAndSize((char *)NULL, buffersize); 561 if (ret == NULL) 562 goto cleanup; 563 bytesread = 0; 564 565 for (;;) { 566 Py_BEGIN_ALLOW_THREADS 567 chunksize = Util_UnivNewlineRead(&bzerror, self->fp, 568 BUF(ret)+bytesread, 569 buffersize-bytesread, 570 self); 571 self->pos += chunksize; 572 Py_END_ALLOW_THREADS 573 bytesread += chunksize; 574 if (bzerror == BZ_STREAM_END) { 575 self->size = self->pos; 576 self->mode = MODE_READ_EOF; 577 break; 578 } else if (bzerror != BZ_OK) { 579 Util_CatchBZ2Error(bzerror); 580 Py_DECREF(ret); 581 ret = NULL; 582 goto cleanup; 583 } 584 if (bytesrequested < 0) { 585 buffersize = Util_NewBufferSize(buffersize); 586 if (_PyString_Resize(&ret, buffersize) < 0) 587 goto cleanup; 588 } else { 589 break; 590 } 591 } 592 if (bytesread != buffersize) 593 _PyString_Resize(&ret, bytesread); 594 595 cleanup: 596 RELEASE_LOCK(self); 597 return ret; 598 } 599 600 PyDoc_STRVAR(BZ2File_readline__doc__, 601 "readline([size]) -> string\n\ 602 \n\ 603 Return the next line from the file, as a string, retaining newline.\n\ 604 A non-negative size argument will limit the maximum number of bytes to\n\ 605 return (an incomplete line may be returned then). Return an empty\n\ 606 string at EOF.\n\ 607 "); 608 609 static PyObject * 610 BZ2File_readline(BZ2FileObject *self, PyObject *args) 611 { 612 PyObject *ret = NULL; 613 int sizehint = -1; 614 615 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint)) 616 return NULL; 617 618 ACQUIRE_LOCK(self); 619 switch (self->mode) { 620 case MODE_READ: 621 break; 622 case MODE_READ_EOF: 623 ret = PyString_FromString(""); 624 goto cleanup; 625 case MODE_CLOSED: 626 PyErr_SetString(PyExc_ValueError, 627 "I/O operation on closed file"); 628 goto cleanup; 629 default: 630 PyErr_SetString(PyExc_IOError, 631 "file is not ready for reading"); 632 goto cleanup; 633 } 634 635 /* refuse to mix with f.next() */ 636 if (check_iterbuffered(self)) 637 goto cleanup; 638 639 if (sizehint == 0) 640 ret = PyString_FromString(""); 641 else 642 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint); 643 644 cleanup: 645 RELEASE_LOCK(self); 646 return ret; 647 } 648 649 PyDoc_STRVAR(BZ2File_readlines__doc__, 650 "readlines([size]) -> list\n\ 651 \n\ 652 Call readline() repeatedly and return a list of lines read.\n\ 653 The optional size argument, if given, is an approximate bound on the\n\ 654 total number of bytes in the lines returned.\n\ 655 "); 656 657 /* This is a hacked version of Python's fileobject.c:file_readlines(). */ 658 static PyObject * 659 BZ2File_readlines(BZ2FileObject *self, PyObject *args) 660 { 661 long sizehint = 0; 662 PyObject *list = NULL; 663 PyObject *line; 664 char small_buffer[SMALLCHUNK]; 665 char *buffer = small_buffer; 666 size_t buffersize = SMALLCHUNK; 667 PyObject *big_buffer = NULL; 668 size_t nfilled = 0; 669 size_t nread; 670 size_t totalread = 0; 671 char *p, *q, *end; 672 int err; 673 int shortread = 0; 674 int bzerror; 675 676 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint)) 677 return NULL; 678 679 ACQUIRE_LOCK(self); 680 switch (self->mode) { 681 case MODE_READ: 682 break; 683 case MODE_READ_EOF: 684 list = PyList_New(0); 685 goto cleanup; 686 case MODE_CLOSED: 687 PyErr_SetString(PyExc_ValueError, 688 "I/O operation on closed file"); 689 goto cleanup; 690 default: 691 PyErr_SetString(PyExc_IOError, 692 "file is not ready for reading"); 693 goto cleanup; 694 } 695 696 /* refuse to mix with f.next() */ 697 if (check_iterbuffered(self)) 698 goto cleanup; 699 700 if ((list = PyList_New(0)) == NULL) 701 goto cleanup; 702 703 for (;;) { 704 Py_BEGIN_ALLOW_THREADS 705 nread = Util_UnivNewlineRead(&bzerror, self->fp, 706 buffer+nfilled, 707 buffersize-nfilled, self); 708 self->pos += nread; 709 Py_END_ALLOW_THREADS 710 if (bzerror == BZ_STREAM_END) { 711 self->size = self->pos; 712 self->mode = MODE_READ_EOF; 713 if (nread == 0) { 714 sizehint = 0; 715 break; 716 } 717 shortread = 1; 718 } else if (bzerror != BZ_OK) { 719 Util_CatchBZ2Error(bzerror); 720 error: 721 Py_DECREF(list); 722 list = NULL; 723 goto cleanup; 724 } 725 totalread += nread; 726 p = memchr(buffer+nfilled, '\n', nread); 727 if (!shortread && p == NULL) { 728 /* Need a larger buffer to fit this line */ 729 nfilled += nread; 730 buffersize *= 2; 731 if (buffersize > INT_MAX) { 732 PyErr_SetString(PyExc_OverflowError, 733 "line is longer than a Python string can hold"); 734 goto error; 735 } 736 if (big_buffer == NULL) { 737 /* Create the big buffer */ 738 big_buffer = PyString_FromStringAndSize( 739 NULL, buffersize); 740 if (big_buffer == NULL) 741 goto error; 742 buffer = PyString_AS_STRING(big_buffer); 743 memcpy(buffer, small_buffer, nfilled); 744 } 745 else { 746 /* Grow the big buffer */ 747 _PyString_Resize(&big_buffer, buffersize); 748 buffer = PyString_AS_STRING(big_buffer); 749 } 750 continue; 751 } 752 end = buffer+nfilled+nread; 753 q = buffer; 754 while (p != NULL) { 755 /* Process complete lines */ 756 p++; 757 line = PyString_FromStringAndSize(q, p-q); 758 if (line == NULL) 759 goto error; 760 err = PyList_Append(list, line); 761 Py_DECREF(line); 762 if (err != 0) 763 goto error; 764 q = p; 765 p = memchr(q, '\n', end-q); 766 } 767 /* Move the remaining incomplete line to the start */ 768 nfilled = end-q; 769 memmove(buffer, q, nfilled); 770 if (sizehint > 0) 771 if (totalread >= (size_t)sizehint) 772 break; 773 if (shortread) { 774 sizehint = 0; 775 break; 776 } 777 } 778 if (nfilled != 0) { 779 /* Partial last line */ 780 line = PyString_FromStringAndSize(buffer, nfilled); 781 if (line == NULL) 782 goto error; 783 if (sizehint > 0) { 784 /* Need to complete the last line */ 785 PyObject *rest = Util_GetLine(self, 0); 786 if (rest == NULL) { 787 Py_DECREF(line); 788 goto error; 789 } 790 PyString_Concat(&line, rest); 791 Py_DECREF(rest); 792 if (line == NULL) 793 goto error; 794 } 795 err = PyList_Append(list, line); 796 Py_DECREF(line); 797 if (err != 0) 798 goto error; 799 } 800 801 cleanup: 802 RELEASE_LOCK(self); 803 if (big_buffer) { 804 Py_DECREF(big_buffer); 805 } 806 return list; 807 } 808 809 PyDoc_STRVAR(BZ2File_xreadlines__doc__, 810 "xreadlines() -> self\n\ 811 \n\ 812 For backward compatibility. BZ2File objects now include the performance\n\ 813 optimizations previously implemented in the xreadlines module.\n\ 814 "); 815 816 PyDoc_STRVAR(BZ2File_write__doc__, 817 "write(data) -> None\n\ 818 \n\ 819 Write the 'data' string to file. Note that due to buffering, close() may\n\ 820 be needed before the file on disk reflects the data written.\n\ 821 "); 822 823 /* This is a hacked version of Python's fileobject.c:file_write(). */ 824 static PyObject * 825 BZ2File_write(BZ2FileObject *self, PyObject *args) 826 { 827 PyObject *ret = NULL; 828 Py_buffer pbuf; 829 char *buf; 830 int len; 831 int bzerror; 832 833 if (!PyArg_ParseTuple(args, "s*:write", &pbuf)) 834 return NULL; 835 buf = pbuf.buf; 836 len = pbuf.len; 837 838 ACQUIRE_LOCK(self); 839 switch (self->mode) { 840 case MODE_WRITE: 841 break; 842 843 case MODE_CLOSED: 844 PyErr_SetString(PyExc_ValueError, 845 "I/O operation on closed file"); 846 goto cleanup; 847 848 default: 849 PyErr_SetString(PyExc_IOError, 850 "file is not ready for writing"); 851 goto cleanup; 852 } 853 854 self->f_softspace = 0; 855 856 Py_BEGIN_ALLOW_THREADS 857 BZ2_bzWrite (&bzerror, self->fp, buf, len); 858 self->pos += len; 859 Py_END_ALLOW_THREADS 860 861 if (bzerror != BZ_OK) { 862 Util_CatchBZ2Error(bzerror); 863 goto cleanup; 864 } 865 866 Py_INCREF(Py_None); 867 ret = Py_None; 868 869 cleanup: 870 PyBuffer_Release(&pbuf); 871 RELEASE_LOCK(self); 872 return ret; 873 } 874 875 PyDoc_STRVAR(BZ2File_writelines__doc__, 876 "writelines(sequence_of_strings) -> None\n\ 877 \n\ 878 Write the sequence of strings to the file. Note that newlines are not\n\ 879 added. The sequence can be any iterable object producing strings. This is\n\ 880 equivalent to calling write() for each string.\n\ 881 "); 882 883 /* This is a hacked version of Python's fileobject.c:file_writelines(). */ 884 static PyObject * 885 BZ2File_writelines(BZ2FileObject *self, PyObject *seq) 886 { 887 #define CHUNKSIZE 1000 888 PyObject *list = NULL; 889 PyObject *iter = NULL; 890 PyObject *ret = NULL; 891 PyObject *line; 892 int i, j, index, len, islist; 893 int bzerror; 894 895 ACQUIRE_LOCK(self); 896 switch (self->mode) { 897 case MODE_WRITE: 898 break; 899 900 case MODE_CLOSED: 901 PyErr_SetString(PyExc_ValueError, 902 "I/O operation on closed file"); 903 goto error; 904 905 default: 906 PyErr_SetString(PyExc_IOError, 907 "file is not ready for writing"); 908 goto error; 909 } 910 911 islist = PyList_Check(seq); 912 if (!islist) { 913 iter = PyObject_GetIter(seq); 914 if (iter == NULL) { 915 PyErr_SetString(PyExc_TypeError, 916 "writelines() requires an iterable argument"); 917 goto error; 918 } 919 list = PyList_New(CHUNKSIZE); 920 if (list == NULL) 921 goto error; 922 } 923 924 /* Strategy: slurp CHUNKSIZE lines into a private list, 925 checking that they are all strings, then write that list 926 without holding the interpreter lock, then come back for more. */ 927 for (index = 0; ; index += CHUNKSIZE) { 928 if (islist) { 929 Py_XDECREF(list); 930 list = PyList_GetSlice(seq, index, index+CHUNKSIZE); 931 if (list == NULL) 932 goto error; 933 j = PyList_GET_SIZE(list); 934 } 935 else { 936 for (j = 0; j < CHUNKSIZE; j++) { 937 line = PyIter_Next(iter); 938 if (line == NULL) { 939 if (PyErr_Occurred()) 940 goto error; 941 break; 942 } 943 PyList_SetItem(list, j, line); 944 } 945 } 946 if (j == 0) 947 break; 948 949 /* Check that all entries are indeed strings. If not, 950 apply the same rules as for file.write() and 951 convert the rets to strings. This is slow, but 952 seems to be the only way since all conversion APIs 953 could potentially execute Python code. */ 954 for (i = 0; i < j; i++) { 955 PyObject *v = PyList_GET_ITEM(list, i); 956 if (!PyString_Check(v)) { 957 const char *buffer; 958 Py_ssize_t len; 959 if (PyObject_AsCharBuffer(v, &buffer, &len)) { 960 PyErr_SetString(PyExc_TypeError, 961 "writelines() " 962 "argument must be " 963 "a sequence of " 964 "strings"); 965 goto error; 966 } 967 line = PyString_FromStringAndSize(buffer, 968 len); 969 if (line == NULL) 970 goto error; 971 Py_DECREF(v); 972 PyList_SET_ITEM(list, i, line); 973 } 974 } 975 976 self->f_softspace = 0; 977 978 /* Since we are releasing the global lock, the 979 following code may *not* execute Python code. */ 980 Py_BEGIN_ALLOW_THREADS 981 for (i = 0; i < j; i++) { 982 line = PyList_GET_ITEM(list, i); 983 len = PyString_GET_SIZE(line); 984 BZ2_bzWrite (&bzerror, self->fp, 985 PyString_AS_STRING(line), len); 986 if (bzerror != BZ_OK) { 987 Py_BLOCK_THREADS 988 Util_CatchBZ2Error(bzerror); 989 goto error; 990 } 991 } 992 Py_END_ALLOW_THREADS 993 994 if (j < CHUNKSIZE) 995 break; 996 } 997 998 Py_INCREF(Py_None); 999 ret = Py_None; 1000 1001 error: 1002 RELEASE_LOCK(self); 1003 Py_XDECREF(list); 1004 Py_XDECREF(iter); 1005 return ret; 1006 #undef CHUNKSIZE 1007 } 1008 1009 PyDoc_STRVAR(BZ2File_seek__doc__, 1010 "seek(offset [, whence]) -> None\n\ 1011 \n\ 1012 Move to new file position. Argument offset is a byte count. Optional\n\ 1013 argument whence defaults to 0 (offset from start of file, offset\n\ 1014 should be >= 0); other values are 1 (move relative to current position,\n\ 1015 positive or negative), and 2 (move relative to end of file, usually\n\ 1016 negative, although many platforms allow seeking beyond the end of a file).\n\ 1017 \n\ 1018 Note that seeking of bz2 files is emulated, and depending on the parameters\n\ 1019 the operation may be extremely slow.\n\ 1020 "); 1021 1022 static PyObject * 1023 BZ2File_seek(BZ2FileObject *self, PyObject *args) 1024 { 1025 int where = 0; 1026 PyObject *offobj; 1027 Py_off_t offset; 1028 char small_buffer[SMALLCHUNK]; 1029 char *buffer = small_buffer; 1030 size_t buffersize = SMALLCHUNK; 1031 Py_off_t bytesread = 0; 1032 size_t readsize; 1033 int chunksize; 1034 int bzerror; 1035 PyObject *ret = NULL; 1036 1037 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where)) 1038 return NULL; 1039 #if !defined(HAVE_LARGEFILE_SUPPORT) 1040 offset = PyInt_AsLong(offobj); 1041 #else 1042 offset = PyLong_Check(offobj) ? 1043 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj); 1044 #endif 1045 if (PyErr_Occurred()) 1046 return NULL; 1047 1048 ACQUIRE_LOCK(self); 1049 Util_DropReadAhead(self); 1050 switch (self->mode) { 1051 case MODE_READ: 1052 case MODE_READ_EOF: 1053 break; 1054 1055 case MODE_CLOSED: 1056 PyErr_SetString(PyExc_ValueError, 1057 "I/O operation on closed file"); 1058 goto cleanup; 1059 1060 default: 1061 PyErr_SetString(PyExc_IOError, 1062 "seek works only while reading"); 1063 goto cleanup; 1064 } 1065 1066 if (where == 2) { 1067 if (self->size == -1) { 1068 assert(self->mode != MODE_READ_EOF); 1069 for (;;) { 1070 Py_BEGIN_ALLOW_THREADS 1071 chunksize = Util_UnivNewlineRead( 1072 &bzerror, self->fp, 1073 buffer, buffersize, 1074 self); 1075 self->pos += chunksize; 1076 Py_END_ALLOW_THREADS 1077 1078 bytesread += chunksize; 1079 if (bzerror == BZ_STREAM_END) { 1080 break; 1081 } else if (bzerror != BZ_OK) { 1082 Util_CatchBZ2Error(bzerror); 1083 goto cleanup; 1084 } 1085 } 1086 self->mode = MODE_READ_EOF; 1087 self->size = self->pos; 1088 bytesread = 0; 1089 } 1090 offset = self->size + offset; 1091 } else if (where == 1) { 1092 offset = self->pos + offset; 1093 } 1094 1095 /* Before getting here, offset must be the absolute position the file 1096 * pointer should be set to. */ 1097 1098 if (offset >= self->pos) { 1099 /* we can move forward */ 1100 offset -= self->pos; 1101 } else { 1102 /* we cannot move back, so rewind the stream */ 1103 BZ2_bzReadClose(&bzerror, self->fp); 1104 if (self->fp) { 1105 PyFile_DecUseCount((PyFileObject *)self->file); 1106 self->fp = NULL; 1107 } 1108 if (bzerror != BZ_OK) { 1109 Util_CatchBZ2Error(bzerror); 1110 goto cleanup; 1111 } 1112 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0); 1113 if (!ret) 1114 goto cleanup; 1115 Py_DECREF(ret); 1116 ret = NULL; 1117 self->pos = 0; 1118 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file), 1119 0, 0, NULL, 0); 1120 if (self->fp) 1121 PyFile_IncUseCount((PyFileObject *)self->file); 1122 if (bzerror != BZ_OK) { 1123 Util_CatchBZ2Error(bzerror); 1124 goto cleanup; 1125 } 1126 self->mode = MODE_READ; 1127 } 1128 1129 if (offset <= 0 || self->mode == MODE_READ_EOF) 1130 goto exit; 1131 1132 /* Before getting here, offset must be set to the number of bytes 1133 * to walk forward. */ 1134 for (;;) { 1135 if (offset-bytesread > buffersize) 1136 readsize = buffersize; 1137 else 1138 /* offset might be wider that readsize, but the result 1139 * of the subtraction is bound by buffersize (see the 1140 * condition above). buffersize is 8192. */ 1141 readsize = (size_t)(offset-bytesread); 1142 Py_BEGIN_ALLOW_THREADS 1143 chunksize = Util_UnivNewlineRead(&bzerror, self->fp, 1144 buffer, readsize, self); 1145 self->pos += chunksize; 1146 Py_END_ALLOW_THREADS 1147 bytesread += chunksize; 1148 if (bzerror == BZ_STREAM_END) { 1149 self->size = self->pos; 1150 self->mode = MODE_READ_EOF; 1151 break; 1152 } else if (bzerror != BZ_OK) { 1153 Util_CatchBZ2Error(bzerror); 1154 goto cleanup; 1155 } 1156 if (bytesread == offset) 1157 break; 1158 } 1159 1160 exit: 1161 Py_INCREF(Py_None); 1162 ret = Py_None; 1163 1164 cleanup: 1165 RELEASE_LOCK(self); 1166 return ret; 1167 } 1168 1169 PyDoc_STRVAR(BZ2File_tell__doc__, 1170 "tell() -> int\n\ 1171 \n\ 1172 Return the current file position, an integer (may be a long integer).\n\ 1173 "); 1174 1175 static PyObject * 1176 BZ2File_tell(BZ2FileObject *self, PyObject *args) 1177 { 1178 PyObject *ret = NULL; 1179 1180 if (self->mode == MODE_CLOSED) { 1181 PyErr_SetString(PyExc_ValueError, 1182 "I/O operation on closed file"); 1183 goto cleanup; 1184 } 1185 1186 #if !defined(HAVE_LARGEFILE_SUPPORT) 1187 ret = PyInt_FromLong(self->pos); 1188 #else 1189 ret = PyLong_FromLongLong(self->pos); 1190 #endif 1191 1192 cleanup: 1193 return ret; 1194 } 1195 1196 PyDoc_STRVAR(BZ2File_close__doc__, 1197 "close() -> None or (perhaps) an integer\n\ 1198 \n\ 1199 Close the file. Sets data attribute .closed to true. A closed file\n\ 1200 cannot be used for further I/O operations. close() may be called more\n\ 1201 than once without error.\n\ 1202 "); 1203 1204 static PyObject * 1205 BZ2File_close(BZ2FileObject *self) 1206 { 1207 PyObject *ret = NULL; 1208 int bzerror = BZ_OK; 1209 1210 ACQUIRE_LOCK(self); 1211 switch (self->mode) { 1212 case MODE_READ: 1213 case MODE_READ_EOF: 1214 BZ2_bzReadClose(&bzerror, self->fp); 1215 break; 1216 case MODE_WRITE: 1217 BZ2_bzWriteClose(&bzerror, self->fp, 1218 0, NULL, NULL); 1219 break; 1220 } 1221 if (self->fp) { 1222 PyFile_DecUseCount((PyFileObject *)self->file); 1223 self->fp = NULL; 1224 } 1225 self->mode = MODE_CLOSED; 1226 ret = PyObject_CallMethod(self->file, "close", NULL); 1227 if (bzerror != BZ_OK) { 1228 Util_CatchBZ2Error(bzerror); 1229 Py_XDECREF(ret); 1230 ret = NULL; 1231 } 1232 1233 RELEASE_LOCK(self); 1234 return ret; 1235 } 1236 1237 PyDoc_STRVAR(BZ2File_enter_doc, 1238 "__enter__() -> self."); 1239 1240 static PyObject * 1241 BZ2File_enter(BZ2FileObject *self) 1242 { 1243 if (self->mode == MODE_CLOSED) { 1244 PyErr_SetString(PyExc_ValueError, 1245 "I/O operation on closed file"); 1246 return NULL; 1247 } 1248 Py_INCREF(self); 1249 return (PyObject *) self; 1250 } 1251 1252 PyDoc_STRVAR(BZ2File_exit_doc, 1253 "__exit__(*excinfo) -> None. Closes the file."); 1254 1255 static PyObject * 1256 BZ2File_exit(BZ2FileObject *self, PyObject *args) 1257 { 1258 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL); 1259 if (!ret) 1260 /* If error occurred, pass through */ 1261 return NULL; 1262 Py_DECREF(ret); 1263 Py_RETURN_NONE; 1264 } 1265 1266 1267 static PyObject *BZ2File_getiter(BZ2FileObject *self); 1268 1269 static PyMethodDef BZ2File_methods[] = { 1270 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__}, 1271 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__}, 1272 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__}, 1273 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__}, 1274 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__}, 1275 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__}, 1276 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__}, 1277 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__}, 1278 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__}, 1279 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc}, 1280 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc}, 1281 {NULL, NULL} /* sentinel */ 1282 }; 1283 1284 1285 /* ===================================================================== */ 1286 /* Getters and setters of BZ2File. */ 1287 1288 /* This is a hacked version of Python's fileobject.c:get_newlines(). */ 1289 static PyObject * 1290 BZ2File_get_newlines(BZ2FileObject *self, void *closure) 1291 { 1292 switch (self->f_newlinetypes) { 1293 case NEWLINE_UNKNOWN: 1294 Py_INCREF(Py_None); 1295 return Py_None; 1296 case NEWLINE_CR: 1297 return PyString_FromString("\r"); 1298 case NEWLINE_LF: 1299 return PyString_FromString("\n"); 1300 case NEWLINE_CR|NEWLINE_LF: 1301 return Py_BuildValue("(ss)", "\r", "\n"); 1302 case NEWLINE_CRLF: 1303 return PyString_FromString("\r\n"); 1304 case NEWLINE_CR|NEWLINE_CRLF: 1305 return Py_BuildValue("(ss)", "\r", "\r\n"); 1306 case NEWLINE_LF|NEWLINE_CRLF: 1307 return Py_BuildValue("(ss)", "\n", "\r\n"); 1308 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF: 1309 return Py_BuildValue("(sss)", "\r", "\n", "\r\n"); 1310 default: 1311 PyErr_Format(PyExc_SystemError, 1312 "Unknown newlines value 0x%x\n", 1313 self->f_newlinetypes); 1314 return NULL; 1315 } 1316 } 1317 1318 static PyObject * 1319 BZ2File_get_closed(BZ2FileObject *self, void *closure) 1320 { 1321 return PyInt_FromLong(self->mode == MODE_CLOSED); 1322 } 1323 1324 static PyObject * 1325 BZ2File_get_mode(BZ2FileObject *self, void *closure) 1326 { 1327 return PyObject_GetAttrString(self->file, "mode"); 1328 } 1329 1330 static PyObject * 1331 BZ2File_get_name(BZ2FileObject *self, void *closure) 1332 { 1333 return PyObject_GetAttrString(self->file, "name"); 1334 } 1335 1336 static PyGetSetDef BZ2File_getset[] = { 1337 {"closed", (getter)BZ2File_get_closed, NULL, 1338 "True if the file is closed"}, 1339 {"newlines", (getter)BZ2File_get_newlines, NULL, 1340 "end-of-line convention used in this file"}, 1341 {"mode", (getter)BZ2File_get_mode, NULL, 1342 "file mode ('r', 'w', or 'U')"}, 1343 {"name", (getter)BZ2File_get_name, NULL, 1344 "file name"}, 1345 {NULL} /* Sentinel */ 1346 }; 1347 1348 1349 /* ===================================================================== */ 1350 /* Members of BZ2File_Type. */ 1351 1352 #undef OFF 1353 #define OFF(x) offsetof(BZ2FileObject, x) 1354 1355 static PyMemberDef BZ2File_members[] = { 1356 {"softspace", T_INT, OFF(f_softspace), 0, 1357 "flag indicating that a space needs to be printed; used by print"}, 1358 {NULL} /* Sentinel */ 1359 }; 1360 1361 /* ===================================================================== */ 1362 /* Slot definitions for BZ2File_Type. */ 1363 1364 static int 1365 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) 1366 { 1367 static char *kwlist[] = {"filename", "mode", "buffering", 1368 "compresslevel", 0}; 1369 PyObject *name; 1370 char *mode = "r"; 1371 int buffering = -1; 1372 int compresslevel = 9; 1373 int bzerror; 1374 int mode_char = 0; 1375 1376 self->size = -1; 1377 1378 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File", 1379 kwlist, &name, &mode, &buffering, 1380 &compresslevel)) 1381 return -1; 1382 1383 if (compresslevel < 1 || compresslevel > 9) { 1384 PyErr_SetString(PyExc_ValueError, 1385 "compresslevel must be between 1 and 9"); 1386 return -1; 1387 } 1388 1389 for (;;) { 1390 int error = 0; 1391 switch (*mode) { 1392 case 'r': 1393 case 'w': 1394 if (mode_char) 1395 error = 1; 1396 mode_char = *mode; 1397 break; 1398 1399 case 'b': 1400 break; 1401 1402 case 'U': 1403 #ifdef __VMS 1404 self->f_univ_newline = 0; 1405 #else 1406 self->f_univ_newline = 1; 1407 #endif 1408 break; 1409 1410 default: 1411 error = 1; 1412 break; 1413 } 1414 if (error) { 1415 PyErr_Format(PyExc_ValueError, 1416 "invalid mode char %c", *mode); 1417 return -1; 1418 } 1419 mode++; 1420 if (*mode == '\0') 1421 break; 1422 } 1423 1424 if (mode_char == 0) { 1425 mode_char = 'r'; 1426 } 1427 1428 mode = (mode_char == 'r') ? "rb" : "wb"; 1429 1430 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)", 1431 name, mode, buffering); 1432 if (self->file == NULL) 1433 return -1; 1434 1435 /* From now on, we have stuff to dealloc, so jump to error label 1436 * instead of returning */ 1437 1438 #ifdef WITH_THREAD 1439 self->lock = PyThread_allocate_lock(); 1440 if (!self->lock) { 1441 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); 1442 goto error; 1443 } 1444 #endif 1445 1446 if (mode_char == 'r') 1447 self->fp = BZ2_bzReadOpen(&bzerror, 1448 PyFile_AsFile(self->file), 1449 0, 0, NULL, 0); 1450 else 1451 self->fp = BZ2_bzWriteOpen(&bzerror, 1452 PyFile_AsFile(self->file), 1453 compresslevel, 0, 0); 1454 1455 if (bzerror != BZ_OK) { 1456 Util_CatchBZ2Error(bzerror); 1457 goto error; 1458 } 1459 PyFile_IncUseCount((PyFileObject *)self->file); 1460 1461 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE; 1462 1463 return 0; 1464 1465 error: 1466 Py_CLEAR(self->file); 1467 #ifdef WITH_THREAD 1468 if (self->lock) { 1469 PyThread_free_lock(self->lock); 1470 self->lock = NULL; 1471 } 1472 #endif 1473 return -1; 1474 } 1475 1476 static void 1477 BZ2File_dealloc(BZ2FileObject *self) 1478 { 1479 int bzerror; 1480 #ifdef WITH_THREAD 1481 if (self->lock) 1482 PyThread_free_lock(self->lock); 1483 #endif 1484 switch (self->mode) { 1485 case MODE_READ: 1486 case MODE_READ_EOF: 1487 BZ2_bzReadClose(&bzerror, self->fp); 1488 break; 1489 case MODE_WRITE: 1490 BZ2_bzWriteClose(&bzerror, self->fp, 1491 0, NULL, NULL); 1492 break; 1493 } 1494 if (self->fp) { 1495 PyFile_DecUseCount((PyFileObject *)self->file); 1496 self->fp = NULL; 1497 } 1498 Util_DropReadAhead(self); 1499 Py_XDECREF(self->file); 1500 Py_TYPE(self)->tp_free((PyObject *)self); 1501 } 1502 1503 /* This is a hacked version of Python's fileobject.c:file_getiter(). */ 1504 static PyObject * 1505 BZ2File_getiter(BZ2FileObject *self) 1506 { 1507 if (self->mode == MODE_CLOSED) { 1508 PyErr_SetString(PyExc_ValueError, 1509 "I/O operation on closed file"); 1510 return NULL; 1511 } 1512 Py_INCREF((PyObject*)self); 1513 return (PyObject *)self; 1514 } 1515 1516 /* This is a hacked version of Python's fileobject.c:file_iternext(). */ 1517 #define READAHEAD_BUFSIZE 8192 1518 static PyObject * 1519 BZ2File_iternext(BZ2FileObject *self) 1520 { 1521 PyStringObject* ret; 1522 ACQUIRE_LOCK(self); 1523 if (self->mode == MODE_CLOSED) { 1524 RELEASE_LOCK(self); 1525 PyErr_SetString(PyExc_ValueError, 1526 "I/O operation on closed file"); 1527 return NULL; 1528 } 1529 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE); 1530 RELEASE_LOCK(self); 1531 if (ret == NULL || PyString_GET_SIZE(ret) == 0) { 1532 Py_XDECREF(ret); 1533 return NULL; 1534 } 1535 return (PyObject *)ret; 1536 } 1537 1538 /* ===================================================================== */ 1539 /* BZ2File_Type definition. */ 1540 1541 PyDoc_VAR(BZ2File__doc__) = 1542 PyDoc_STR( 1543 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\ 1544 \n\ 1545 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\ 1546 writing. When opened for writing, the file will be created if it doesn't\n\ 1547 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\ 1548 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\ 1549 is given, must be a number between 1 and 9.\n\ 1550 ") 1551 PyDoc_STR( 1552 "\n\ 1553 Add a 'U' to mode to open the file for input with universal newline\n\ 1554 support. Any line ending in the input file will be seen as a '\\n' in\n\ 1555 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\ 1556 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\ 1557 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\ 1558 newlines are available only when reading.\n\ 1559 ") 1560 ; 1561 1562 static PyTypeObject BZ2File_Type = { 1563 PyVarObject_HEAD_INIT(NULL, 0) 1564 "bz2.BZ2File", /*tp_name*/ 1565 sizeof(BZ2FileObject), /*tp_basicsize*/ 1566 0, /*tp_itemsize*/ 1567 (destructor)BZ2File_dealloc, /*tp_dealloc*/ 1568 0, /*tp_print*/ 1569 0, /*tp_getattr*/ 1570 0, /*tp_setattr*/ 1571 0, /*tp_compare*/ 1572 0, /*tp_repr*/ 1573 0, /*tp_as_number*/ 1574 0, /*tp_as_sequence*/ 1575 0, /*tp_as_mapping*/ 1576 0, /*tp_hash*/ 1577 0, /*tp_call*/ 1578 0, /*tp_str*/ 1579 PyObject_GenericGetAttr,/*tp_getattro*/ 1580 PyObject_GenericSetAttr,/*tp_setattro*/ 1581 0, /*tp_as_buffer*/ 1582 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ 1583 BZ2File__doc__, /*tp_doc*/ 1584 0, /*tp_traverse*/ 1585 0, /*tp_clear*/ 1586 0, /*tp_richcompare*/ 1587 0, /*tp_weaklistoffset*/ 1588 (getiterfunc)BZ2File_getiter, /*tp_iter*/ 1589 (iternextfunc)BZ2File_iternext, /*tp_iternext*/ 1590 BZ2File_methods, /*tp_methods*/ 1591 BZ2File_members, /*tp_members*/ 1592 BZ2File_getset, /*tp_getset*/ 1593 0, /*tp_base*/ 1594 0, /*tp_dict*/ 1595 0, /*tp_descr_get*/ 1596 0, /*tp_descr_set*/ 1597 0, /*tp_dictoffset*/ 1598 (initproc)BZ2File_init, /*tp_init*/ 1599 PyType_GenericAlloc, /*tp_alloc*/ 1600 PyType_GenericNew, /*tp_new*/ 1601 _PyObject_Del, /*tp_free*/ 1602 0, /*tp_is_gc*/ 1603 }; 1604 1605 1606 /* ===================================================================== */ 1607 /* Methods of BZ2Comp. */ 1608 1609 PyDoc_STRVAR(BZ2Comp_compress__doc__, 1610 "compress(data) -> string\n\ 1611 \n\ 1612 Provide more data to the compressor object. It will return chunks of\n\ 1613 compressed data whenever possible. When you've finished providing data\n\ 1614 to compress, call the flush() method to finish the compression process,\n\ 1615 and return what is left in the internal buffers.\n\ 1616 "); 1617 1618 static PyObject * 1619 BZ2Comp_compress(BZ2CompObject *self, PyObject *args) 1620 { 1621 Py_buffer pdata; 1622 char *data; 1623 int datasize; 1624 int bufsize = SMALLCHUNK; 1625 PY_LONG_LONG totalout; 1626 PyObject *ret = NULL; 1627 bz_stream *bzs = &self->bzs; 1628 int bzerror; 1629 1630 if (!PyArg_ParseTuple(args, "s*:compress", &pdata)) 1631 return NULL; 1632 data = pdata.buf; 1633 datasize = pdata.len; 1634 1635 if (datasize == 0) { 1636 PyBuffer_Release(&pdata); 1637 return PyString_FromString(""); 1638 } 1639 1640 ACQUIRE_LOCK(self); 1641 if (!self->running) { 1642 PyErr_SetString(PyExc_ValueError, 1643 "this object was already flushed"); 1644 goto error; 1645 } 1646 1647 ret = PyString_FromStringAndSize(NULL, bufsize); 1648 if (!ret) 1649 goto error; 1650 1651 bzs->next_in = data; 1652 bzs->avail_in = datasize; 1653 bzs->next_out = BUF(ret); 1654 bzs->avail_out = bufsize; 1655 1656 totalout = BZS_TOTAL_OUT(bzs); 1657 1658 for (;;) { 1659 Py_BEGIN_ALLOW_THREADS 1660 bzerror = BZ2_bzCompress(bzs, BZ_RUN); 1661 Py_END_ALLOW_THREADS 1662 if (bzerror != BZ_RUN_OK) { 1663 Util_CatchBZ2Error(bzerror); 1664 goto error; 1665 } 1666 if (bzs->avail_in == 0) 1667 break; /* no more input data */ 1668 if (bzs->avail_out == 0) { 1669 bufsize = Util_NewBufferSize(bufsize); 1670 if (_PyString_Resize(&ret, bufsize) < 0) { 1671 BZ2_bzCompressEnd(bzs); 1672 goto error; 1673 } 1674 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs) 1675 - totalout); 1676 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); 1677 } 1678 } 1679 1680 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)); 1681 1682 RELEASE_LOCK(self); 1683 PyBuffer_Release(&pdata); 1684 return ret; 1685 1686 error: 1687 RELEASE_LOCK(self); 1688 PyBuffer_Release(&pdata); 1689 Py_XDECREF(ret); 1690 return NULL; 1691 } 1692 1693 PyDoc_STRVAR(BZ2Comp_flush__doc__, 1694 "flush() -> string\n\ 1695 \n\ 1696 Finish the compression process and return what is left in internal buffers.\n\ 1697 You must not use the compressor object after calling this method.\n\ 1698 "); 1699 1700 static PyObject * 1701 BZ2Comp_flush(BZ2CompObject *self) 1702 { 1703 int bufsize = SMALLCHUNK; 1704 PyObject *ret = NULL; 1705 bz_stream *bzs = &self->bzs; 1706 PY_LONG_LONG totalout; 1707 int bzerror; 1708 1709 ACQUIRE_LOCK(self); 1710 if (!self->running) { 1711 PyErr_SetString(PyExc_ValueError, "object was already " 1712 "flushed"); 1713 goto error; 1714 } 1715 self->running = 0; 1716 1717 ret = PyString_FromStringAndSize(NULL, bufsize); 1718 if (!ret) 1719 goto error; 1720 1721 bzs->next_out = BUF(ret); 1722 bzs->avail_out = bufsize; 1723 1724 totalout = BZS_TOTAL_OUT(bzs); 1725 1726 for (;;) { 1727 Py_BEGIN_ALLOW_THREADS 1728 bzerror = BZ2_bzCompress(bzs, BZ_FINISH); 1729 Py_END_ALLOW_THREADS 1730 if (bzerror == BZ_STREAM_END) { 1731 break; 1732 } else if (bzerror != BZ_FINISH_OK) { 1733 Util_CatchBZ2Error(bzerror); 1734 goto error; 1735 } 1736 if (bzs->avail_out == 0) { 1737 bufsize = Util_NewBufferSize(bufsize); 1738 if (_PyString_Resize(&ret, bufsize) < 0) 1739 goto error; 1740 bzs->next_out = BUF(ret); 1741 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs) 1742 - totalout); 1743 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); 1744 } 1745 } 1746 1747 if (bzs->avail_out != 0) 1748 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)); 1749 1750 RELEASE_LOCK(self); 1751 return ret; 1752 1753 error: 1754 RELEASE_LOCK(self); 1755 Py_XDECREF(ret); 1756 return NULL; 1757 } 1758 1759 static PyMethodDef BZ2Comp_methods[] = { 1760 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS, 1761 BZ2Comp_compress__doc__}, 1762 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS, 1763 BZ2Comp_flush__doc__}, 1764 {NULL, NULL} /* sentinel */ 1765 }; 1766 1767 1768 /* ===================================================================== */ 1769 /* Slot definitions for BZ2Comp_Type. */ 1770 1771 static int 1772 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs) 1773 { 1774 int compresslevel = 9; 1775 int bzerror; 1776 static char *kwlist[] = {"compresslevel", 0}; 1777 1778 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor", 1779 kwlist, &compresslevel)) 1780 return -1; 1781 1782 if (compresslevel < 1 || compresslevel > 9) { 1783 PyErr_SetString(PyExc_ValueError, 1784 "compresslevel must be between 1 and 9"); 1785 goto error; 1786 } 1787 1788 #ifdef WITH_THREAD 1789 self->lock = PyThread_allocate_lock(); 1790 if (!self->lock) { 1791 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); 1792 goto error; 1793 } 1794 #endif 1795 1796 memset(&self->bzs, 0, sizeof(bz_stream)); 1797 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0); 1798 if (bzerror != BZ_OK) { 1799 Util_CatchBZ2Error(bzerror); 1800 goto error; 1801 } 1802 1803 self->running = 1; 1804 1805 return 0; 1806 error: 1807 #ifdef WITH_THREAD 1808 if (self->lock) { 1809 PyThread_free_lock(self->lock); 1810 self->lock = NULL; 1811 } 1812 #endif 1813 return -1; 1814 } 1815 1816 static void 1817 BZ2Comp_dealloc(BZ2CompObject *self) 1818 { 1819 #ifdef WITH_THREAD 1820 if (self->lock) 1821 PyThread_free_lock(self->lock); 1822 #endif 1823 BZ2_bzCompressEnd(&self->bzs); 1824 Py_TYPE(self)->tp_free((PyObject *)self); 1825 } 1826 1827 1828 /* ===================================================================== */ 1829 /* BZ2Comp_Type definition. */ 1830 1831 PyDoc_STRVAR(BZ2Comp__doc__, 1832 "BZ2Compressor([compresslevel=9]) -> compressor object\n\ 1833 \n\ 1834 Create a new compressor object. This object may be used to compress\n\ 1835 data sequentially. If you want to compress data in one shot, use the\n\ 1836 compress() function instead. The compresslevel parameter, if given,\n\ 1837 must be a number between 1 and 9.\n\ 1838 "); 1839 1840 static PyTypeObject BZ2Comp_Type = { 1841 PyVarObject_HEAD_INIT(NULL, 0) 1842 "bz2.BZ2Compressor", /*tp_name*/ 1843 sizeof(BZ2CompObject), /*tp_basicsize*/ 1844 0, /*tp_itemsize*/ 1845 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/ 1846 0, /*tp_print*/ 1847 0, /*tp_getattr*/ 1848 0, /*tp_setattr*/ 1849 0, /*tp_compare*/ 1850 0, /*tp_repr*/ 1851 0, /*tp_as_number*/ 1852 0, /*tp_as_sequence*/ 1853 0, /*tp_as_mapping*/ 1854 0, /*tp_hash*/ 1855 0, /*tp_call*/ 1856 0, /*tp_str*/ 1857 PyObject_GenericGetAttr,/*tp_getattro*/ 1858 PyObject_GenericSetAttr,/*tp_setattro*/ 1859 0, /*tp_as_buffer*/ 1860 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ 1861 BZ2Comp__doc__, /*tp_doc*/ 1862 0, /*tp_traverse*/ 1863 0, /*tp_clear*/ 1864 0, /*tp_richcompare*/ 1865 0, /*tp_weaklistoffset*/ 1866 0, /*tp_iter*/ 1867 0, /*tp_iternext*/ 1868 BZ2Comp_methods, /*tp_methods*/ 1869 0, /*tp_members*/ 1870 0, /*tp_getset*/ 1871 0, /*tp_base*/ 1872 0, /*tp_dict*/ 1873 0, /*tp_descr_get*/ 1874 0, /*tp_descr_set*/ 1875 0, /*tp_dictoffset*/ 1876 (initproc)BZ2Comp_init, /*tp_init*/ 1877 PyType_GenericAlloc, /*tp_alloc*/ 1878 PyType_GenericNew, /*tp_new*/ 1879 _PyObject_Del, /*tp_free*/ 1880 0, /*tp_is_gc*/ 1881 }; 1882 1883 1884 /* ===================================================================== */ 1885 /* Members of BZ2Decomp. */ 1886 1887 #undef OFF 1888 #define OFF(x) offsetof(BZ2DecompObject, x) 1889 1890 static PyMemberDef BZ2Decomp_members[] = { 1891 {"unused_data", T_OBJECT, OFF(unused_data), RO}, 1892 {NULL} /* Sentinel */ 1893 }; 1894 1895 1896 /* ===================================================================== */ 1897 /* Methods of BZ2Decomp. */ 1898 1899 PyDoc_STRVAR(BZ2Decomp_decompress__doc__, 1900 "decompress(data) -> string\n\ 1901 \n\ 1902 Provide more data to the decompressor object. It will return chunks\n\ 1903 of decompressed data whenever possible. If you try to decompress data\n\ 1904 after the end of stream is found, EOFError will be raised. If any data\n\ 1905 was found after the end of stream, it'll be ignored and saved in\n\ 1906 unused_data attribute.\n\ 1907 "); 1908 1909 static PyObject * 1910 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args) 1911 { 1912 Py_buffer pdata; 1913 char *data; 1914 int datasize; 1915 int bufsize = SMALLCHUNK; 1916 PY_LONG_LONG totalout; 1917 PyObject *ret = NULL; 1918 bz_stream *bzs = &self->bzs; 1919 int bzerror; 1920 1921 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata)) 1922 return NULL; 1923 data = pdata.buf; 1924 datasize = pdata.len; 1925 1926 ACQUIRE_LOCK(self); 1927 if (!self->running) { 1928 PyErr_SetString(PyExc_EOFError, "end of stream was " 1929 "already found"); 1930 goto error; 1931 } 1932 1933 ret = PyString_FromStringAndSize(NULL, bufsize); 1934 if (!ret) 1935 goto error; 1936 1937 bzs->next_in = data; 1938 bzs->avail_in = datasize; 1939 bzs->next_out = BUF(ret); 1940 bzs->avail_out = bufsize; 1941 1942 totalout = BZS_TOTAL_OUT(bzs); 1943 1944 for (;;) { 1945 Py_BEGIN_ALLOW_THREADS 1946 bzerror = BZ2_bzDecompress(bzs); 1947 Py_END_ALLOW_THREADS 1948 if (bzerror == BZ_STREAM_END) { 1949 if (bzs->avail_in != 0) { 1950 Py_DECREF(self->unused_data); 1951 self->unused_data = 1952 PyString_FromStringAndSize(bzs->next_in, 1953 bzs->avail_in); 1954 } 1955 self->running = 0; 1956 break; 1957 } 1958 if (bzerror != BZ_OK) { 1959 Util_CatchBZ2Error(bzerror); 1960 goto error; 1961 } 1962 if (bzs->avail_in == 0) 1963 break; /* no more input data */ 1964 if (bzs->avail_out == 0) { 1965 bufsize = Util_NewBufferSize(bufsize); 1966 if (_PyString_Resize(&ret, bufsize) < 0) { 1967 BZ2_bzDecompressEnd(bzs); 1968 goto error; 1969 } 1970 bzs->next_out = BUF(ret); 1971 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs) 1972 - totalout); 1973 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); 1974 } 1975 } 1976 1977 if (bzs->avail_out != 0) 1978 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)); 1979 1980 RELEASE_LOCK(self); 1981 PyBuffer_Release(&pdata); 1982 return ret; 1983 1984 error: 1985 RELEASE_LOCK(self); 1986 PyBuffer_Release(&pdata); 1987 Py_XDECREF(ret); 1988 return NULL; 1989 } 1990 1991 static PyMethodDef BZ2Decomp_methods[] = { 1992 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__}, 1993 {NULL, NULL} /* sentinel */ 1994 }; 1995 1996 1997 /* ===================================================================== */ 1998 /* Slot definitions for BZ2Decomp_Type. */ 1999 2000 static int 2001 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs) 2002 { 2003 int bzerror; 2004 2005 if (!PyArg_ParseTuple(args, ":BZ2Decompressor")) 2006 return -1; 2007 2008 #ifdef WITH_THREAD 2009 self->lock = PyThread_allocate_lock(); 2010 if (!self->lock) { 2011 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); 2012 goto error; 2013 } 2014 #endif 2015 2016 self->unused_data = PyString_FromString(""); 2017 if (!self->unused_data) 2018 goto error; 2019 2020 memset(&self->bzs, 0, sizeof(bz_stream)); 2021 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); 2022 if (bzerror != BZ_OK) { 2023 Util_CatchBZ2Error(bzerror); 2024 goto error; 2025 } 2026 2027 self->running = 1; 2028 2029 return 0; 2030 2031 error: 2032 #ifdef WITH_THREAD 2033 if (self->lock) { 2034 PyThread_free_lock(self->lock); 2035 self->lock = NULL; 2036 } 2037 #endif 2038 Py_CLEAR(self->unused_data); 2039 return -1; 2040 } 2041 2042 static void 2043 BZ2Decomp_dealloc(BZ2DecompObject *self) 2044 { 2045 #ifdef WITH_THREAD 2046 if (self->lock) 2047 PyThread_free_lock(self->lock); 2048 #endif 2049 Py_XDECREF(self->unused_data); 2050 BZ2_bzDecompressEnd(&self->bzs); 2051 Py_TYPE(self)->tp_free((PyObject *)self); 2052 } 2053 2054 2055 /* ===================================================================== */ 2056 /* BZ2Decomp_Type definition. */ 2057 2058 PyDoc_STRVAR(BZ2Decomp__doc__, 2059 "BZ2Decompressor() -> decompressor object\n\ 2060 \n\ 2061 Create a new decompressor object. This object may be used to decompress\n\ 2062 data sequentially. If you want to decompress data in one shot, use the\n\ 2063 decompress() function instead.\n\ 2064 "); 2065 2066 static PyTypeObject BZ2Decomp_Type = { 2067 PyVarObject_HEAD_INIT(NULL, 0) 2068 "bz2.BZ2Decompressor", /*tp_name*/ 2069 sizeof(BZ2DecompObject), /*tp_basicsize*/ 2070 0, /*tp_itemsize*/ 2071 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/ 2072 0, /*tp_print*/ 2073 0, /*tp_getattr*/ 2074 0, /*tp_setattr*/ 2075 0, /*tp_compare*/ 2076 0, /*tp_repr*/ 2077 0, /*tp_as_number*/ 2078 0, /*tp_as_sequence*/ 2079 0, /*tp_as_mapping*/ 2080 0, /*tp_hash*/ 2081 0, /*tp_call*/ 2082 0, /*tp_str*/ 2083 PyObject_GenericGetAttr,/*tp_getattro*/ 2084 PyObject_GenericSetAttr,/*tp_setattro*/ 2085 0, /*tp_as_buffer*/ 2086 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ 2087 BZ2Decomp__doc__, /*tp_doc*/ 2088 0, /*tp_traverse*/ 2089 0, /*tp_clear*/ 2090 0, /*tp_richcompare*/ 2091 0, /*tp_weaklistoffset*/ 2092 0, /*tp_iter*/ 2093 0, /*tp_iternext*/ 2094 BZ2Decomp_methods, /*tp_methods*/ 2095 BZ2Decomp_members, /*tp_members*/ 2096 0, /*tp_getset*/ 2097 0, /*tp_base*/ 2098 0, /*tp_dict*/ 2099 0, /*tp_descr_get*/ 2100 0, /*tp_descr_set*/ 2101 0, /*tp_dictoffset*/ 2102 (initproc)BZ2Decomp_init, /*tp_init*/ 2103 PyType_GenericAlloc, /*tp_alloc*/ 2104 PyType_GenericNew, /*tp_new*/ 2105 _PyObject_Del, /*tp_free*/ 2106 0, /*tp_is_gc*/ 2107 }; 2108 2109 2110 /* ===================================================================== */ 2111 /* Module functions. */ 2112 2113 PyDoc_STRVAR(bz2_compress__doc__, 2114 "compress(data [, compresslevel=9]) -> string\n\ 2115 \n\ 2116 Compress data in one shot. If you want to compress data sequentially,\n\ 2117 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\ 2118 given, must be a number between 1 and 9.\n\ 2119 "); 2120 2121 static PyObject * 2122 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs) 2123 { 2124 int compresslevel=9; 2125 Py_buffer pdata; 2126 char *data; 2127 int datasize; 2128 int bufsize; 2129 PyObject *ret = NULL; 2130 bz_stream _bzs; 2131 bz_stream *bzs = &_bzs; 2132 int bzerror; 2133 static char *kwlist[] = {"data", "compresslevel", 0}; 2134 2135 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", 2136 kwlist, &pdata, 2137 &compresslevel)) 2138 return NULL; 2139 data = pdata.buf; 2140 datasize = pdata.len; 2141 2142 if (compresslevel < 1 || compresslevel > 9) { 2143 PyErr_SetString(PyExc_ValueError, 2144 "compresslevel must be between 1 and 9"); 2145 PyBuffer_Release(&pdata); 2146 return NULL; 2147 } 2148 2149 /* Conforming to bz2 manual, this is large enough to fit compressed 2150 * data in one shot. We will check it later anyway. */ 2151 bufsize = datasize + (datasize/100+1) + 600; 2152 2153 ret = PyString_FromStringAndSize(NULL, bufsize); 2154 if (!ret) { 2155 PyBuffer_Release(&pdata); 2156 return NULL; 2157 } 2158 2159 memset(bzs, 0, sizeof(bz_stream)); 2160 2161 bzs->next_in = data; 2162 bzs->avail_in = datasize; 2163 bzs->next_out = BUF(ret); 2164 bzs->avail_out = bufsize; 2165 2166 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0); 2167 if (bzerror != BZ_OK) { 2168 Util_CatchBZ2Error(bzerror); 2169 PyBuffer_Release(&pdata); 2170 Py_DECREF(ret); 2171 return NULL; 2172 } 2173 2174 for (;;) { 2175 Py_BEGIN_ALLOW_THREADS 2176 bzerror = BZ2_bzCompress(bzs, BZ_FINISH); 2177 Py_END_ALLOW_THREADS 2178 if (bzerror == BZ_STREAM_END) { 2179 break; 2180 } else if (bzerror != BZ_FINISH_OK) { 2181 BZ2_bzCompressEnd(bzs); 2182 Util_CatchBZ2Error(bzerror); 2183 PyBuffer_Release(&pdata); 2184 Py_DECREF(ret); 2185 return NULL; 2186 } 2187 if (bzs->avail_out == 0) { 2188 bufsize = Util_NewBufferSize(bufsize); 2189 if (_PyString_Resize(&ret, bufsize) < 0) { 2190 BZ2_bzCompressEnd(bzs); 2191 PyBuffer_Release(&pdata); 2192 Py_DECREF(ret); 2193 return NULL; 2194 } 2195 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs); 2196 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); 2197 } 2198 } 2199 2200 if (bzs->avail_out != 0) 2201 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)); 2202 BZ2_bzCompressEnd(bzs); 2203 2204 PyBuffer_Release(&pdata); 2205 return ret; 2206 } 2207 2208 PyDoc_STRVAR(bz2_decompress__doc__, 2209 "decompress(data) -> decompressed data\n\ 2210 \n\ 2211 Decompress data in one shot. If you want to decompress data sequentially,\n\ 2212 use an instance of BZ2Decompressor instead.\n\ 2213 "); 2214 2215 static PyObject * 2216 bz2_decompress(PyObject *self, PyObject *args) 2217 { 2218 Py_buffer pdata; 2219 char *data; 2220 int datasize; 2221 int bufsize = SMALLCHUNK; 2222 PyObject *ret; 2223 bz_stream _bzs; 2224 bz_stream *bzs = &_bzs; 2225 int bzerror; 2226 2227 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata)) 2228 return NULL; 2229 data = pdata.buf; 2230 datasize = pdata.len; 2231 2232 if (datasize == 0) { 2233 PyBuffer_Release(&pdata); 2234 return PyString_FromString(""); 2235 } 2236 2237 ret = PyString_FromStringAndSize(NULL, bufsize); 2238 if (!ret) { 2239 PyBuffer_Release(&pdata); 2240 return NULL; 2241 } 2242 2243 memset(bzs, 0, sizeof(bz_stream)); 2244 2245 bzs->next_in = data; 2246 bzs->avail_in = datasize; 2247 bzs->next_out = BUF(ret); 2248 bzs->avail_out = bufsize; 2249 2250 bzerror = BZ2_bzDecompressInit(bzs, 0, 0); 2251 if (bzerror != BZ_OK) { 2252 Util_CatchBZ2Error(bzerror); 2253 Py_DECREF(ret); 2254 PyBuffer_Release(&pdata); 2255 return NULL; 2256 } 2257 2258 for (;;) { 2259 Py_BEGIN_ALLOW_THREADS 2260 bzerror = BZ2_bzDecompress(bzs); 2261 Py_END_ALLOW_THREADS 2262 if (bzerror == BZ_STREAM_END) { 2263 break; 2264 } else if (bzerror != BZ_OK) { 2265 BZ2_bzDecompressEnd(bzs); 2266 Util_CatchBZ2Error(bzerror); 2267 PyBuffer_Release(&pdata); 2268 Py_DECREF(ret); 2269 return NULL; 2270 } 2271 if (bzs->avail_in == 0) { 2272 BZ2_bzDecompressEnd(bzs); 2273 PyErr_SetString(PyExc_ValueError, 2274 "couldn't find end of stream"); 2275 PyBuffer_Release(&pdata); 2276 Py_DECREF(ret); 2277 return NULL; 2278 } 2279 if (bzs->avail_out == 0) { 2280 bufsize = Util_NewBufferSize(bufsize); 2281 if (_PyString_Resize(&ret, bufsize) < 0) { 2282 BZ2_bzDecompressEnd(bzs); 2283 PyBuffer_Release(&pdata); 2284 Py_DECREF(ret); 2285 return NULL; 2286 } 2287 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs); 2288 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); 2289 } 2290 } 2291 2292 if (bzs->avail_out != 0) 2293 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)); 2294 BZ2_bzDecompressEnd(bzs); 2295 PyBuffer_Release(&pdata); 2296 2297 return ret; 2298 } 2299 2300 static PyMethodDef bz2_methods[] = { 2301 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS, 2302 bz2_compress__doc__}, 2303 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS, 2304 bz2_decompress__doc__}, 2305 {NULL, NULL} /* sentinel */ 2306 }; 2307 2308 /* ===================================================================== */ 2309 /* Initialization function. */ 2310 2311 PyDoc_STRVAR(bz2__doc__, 2312 "The python bz2 module provides a comprehensive interface for\n\ 2313 the bz2 compression library. It implements a complete file\n\ 2314 interface, one shot (de)compression functions, and types for\n\ 2315 sequential (de)compression.\n\ 2316 "); 2317 2318 PyMODINIT_FUNC 2319 initbz2(void) 2320 { 2321 PyObject *m; 2322 2323 if (PyType_Ready(&BZ2File_Type) < 0) 2324 return; 2325 if (PyType_Ready(&BZ2Comp_Type) < 0) 2326 return; 2327 if (PyType_Ready(&BZ2Decomp_Type) < 0) 2328 return; 2329 2330 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__); 2331 if (m == NULL) 2332 return; 2333 2334 PyModule_AddObject(m, "__author__", PyString_FromString(__author__)); 2335 2336 Py_INCREF(&BZ2File_Type); 2337 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type); 2338 2339 Py_INCREF(&BZ2Comp_Type); 2340 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type); 2341 2342 Py_INCREF(&BZ2Decomp_Type); 2343 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type); 2344 } 2345