Home | History | Annotate | Download | only in Modules
      1 /*
      2 
      3 python-bz2 - python bz2 library interface
      4 
      5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer (at) conectiva.com>
      6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
      7 
      8 */
      9 
     10 #include "Python.h"
     11 #include <stdio.h>
     12 #include <bzlib.h>
     13 #include "structmember.h"
     14 
     15 #ifdef WITH_THREAD
     16 #include "pythread.h"
     17 #endif
     18 
     19 static char __author__[] =
     20 "The bz2 python module was written by:\n\
     21 \n\
     22     Gustavo Niemeyer <niemeyer (at) conectiva.com>\n\
     23 ";
     24 
     25 /* Our very own off_t-like type, 64-bit if possible */
     26 /* copied from Objects/fileobject.c */
     27 #if !defined(HAVE_LARGEFILE_SUPPORT)
     28 typedef off_t Py_off_t;
     29 #elif SIZEOF_OFF_T >= 8
     30 typedef off_t Py_off_t;
     31 #elif SIZEOF_FPOS_T >= 8
     32 typedef fpos_t Py_off_t;
     33 #else
     34 #error "Large file support, but neither off_t nor fpos_t is large enough."
     35 #endif
     36 
     37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
     38 
     39 #define MODE_CLOSED   0
     40 #define MODE_READ     1
     41 #define MODE_READ_EOF 2
     42 #define MODE_WRITE    3
     43 
     44 #define BZ2FileObject_Check(v)  (Py_TYPE(v) == &BZ2File_Type)
     45 
     46 
     47 #ifdef BZ_CONFIG_ERROR
     48 
     49 #if SIZEOF_LONG >= 8
     50 #define BZS_TOTAL_OUT(bzs) \
     51     (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
     52 #elif SIZEOF_LONG_LONG >= 8
     53 #define BZS_TOTAL_OUT(bzs) \
     54     (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
     55 #else
     56 #define BZS_TOTAL_OUT(bzs) \
     57     bzs->total_out_lo32
     58 #endif
     59 
     60 #else /* ! BZ_CONFIG_ERROR */
     61 
     62 #define BZ2_bzRead bzRead
     63 #define BZ2_bzReadOpen bzReadOpen
     64 #define BZ2_bzReadClose bzReadClose
     65 #define BZ2_bzWrite bzWrite
     66 #define BZ2_bzWriteOpen bzWriteOpen
     67 #define BZ2_bzWriteClose bzWriteClose
     68 #define BZ2_bzCompress bzCompress
     69 #define BZ2_bzCompressInit bzCompressInit
     70 #define BZ2_bzCompressEnd bzCompressEnd
     71 #define BZ2_bzDecompress bzDecompress
     72 #define BZ2_bzDecompressInit bzDecompressInit
     73 #define BZ2_bzDecompressEnd bzDecompressEnd
     74 
     75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
     76 
     77 #endif /* ! BZ_CONFIG_ERROR */
     78 
     79 
     80 #ifdef WITH_THREAD
     81 #define ACQUIRE_LOCK(obj) do { \
     82     if (!PyThread_acquire_lock(obj->lock, 0)) { \
     83         Py_BEGIN_ALLOW_THREADS \
     84         PyThread_acquire_lock(obj->lock, 1); \
     85         Py_END_ALLOW_THREADS \
     86     } } while(0)
     87 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
     88 #else
     89 #define ACQUIRE_LOCK(obj)
     90 #define RELEASE_LOCK(obj)
     91 #endif
     92 
     93 /* Bits in f_newlinetypes */
     94 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
     95 #define NEWLINE_CR 1            /* \r newline seen */
     96 #define NEWLINE_LF 2            /* \n newline seen */
     97 #define NEWLINE_CRLF 4          /* \r\n newline seen */
     98 
     99 /* ===================================================================== */
    100 /* Structure definitions. */
    101 
    102 typedef struct {
    103     PyObject_HEAD
    104     PyObject *file;
    105 
    106     char* f_buf;                /* Allocated readahead buffer */
    107     char* f_bufend;             /* Points after last occupied position */
    108     char* f_bufptr;             /* Current buffer position */
    109 
    110     int f_softspace;            /* Flag used by 'print' command */
    111 
    112     int f_univ_newline;         /* Handle any newline convention */
    113     int f_newlinetypes;         /* Types of newlines seen */
    114     int f_skipnextlf;           /* Skip next \n */
    115 
    116     BZFILE *fp;
    117     int mode;
    118     Py_off_t pos;
    119     Py_off_t size;
    120 #ifdef WITH_THREAD
    121     PyThread_type_lock lock;
    122 #endif
    123 } BZ2FileObject;
    124 
    125 typedef struct {
    126     PyObject_HEAD
    127     bz_stream bzs;
    128     int running;
    129 #ifdef WITH_THREAD
    130     PyThread_type_lock lock;
    131 #endif
    132 } BZ2CompObject;
    133 
    134 typedef struct {
    135     PyObject_HEAD
    136     bz_stream bzs;
    137     int running;
    138     PyObject *unused_data;
    139 #ifdef WITH_THREAD
    140     PyThread_type_lock lock;
    141 #endif
    142 } BZ2DecompObject;
    143 
    144 /* ===================================================================== */
    145 /* Utility functions. */
    146 
    147 /* Refuse regular I/O if there's data in the iteration-buffer.
    148  * Mixing them would cause data to arrive out of order, as the read*
    149  * methods don't use the iteration buffer. */
    150 static int
    151 check_iterbuffered(BZ2FileObject *f)
    152 {
    153     if (f->f_buf != NULL &&
    154         (f->f_bufend - f->f_bufptr) > 0 &&
    155         f->f_buf[0] != '\0') {
    156         PyErr_SetString(PyExc_ValueError,
    157             "Mixing iteration and read methods would lose data");
    158         return -1;
    159     }
    160     return 0;
    161 }
    162 
    163 static int
    164 Util_CatchBZ2Error(int bzerror)
    165 {
    166     int ret = 0;
    167     switch(bzerror) {
    168         case BZ_OK:
    169         case BZ_STREAM_END:
    170             break;
    171 
    172 #ifdef BZ_CONFIG_ERROR
    173         case BZ_CONFIG_ERROR:
    174             PyErr_SetString(PyExc_SystemError,
    175                             "the bz2 library was not compiled "
    176                             "correctly");
    177             ret = 1;
    178             break;
    179 #endif
    180 
    181         case BZ_PARAM_ERROR:
    182             PyErr_SetString(PyExc_ValueError,
    183                             "the bz2 library has received wrong "
    184                             "parameters");
    185             ret = 1;
    186             break;
    187 
    188         case BZ_MEM_ERROR:
    189             PyErr_NoMemory();
    190             ret = 1;
    191             break;
    192 
    193         case BZ_DATA_ERROR:
    194         case BZ_DATA_ERROR_MAGIC:
    195             PyErr_SetString(PyExc_IOError, "invalid data stream");
    196             ret = 1;
    197             break;
    198 
    199         case BZ_IO_ERROR:
    200             PyErr_SetString(PyExc_IOError, "unknown IO error");
    201             ret = 1;
    202             break;
    203 
    204         case BZ_UNEXPECTED_EOF:
    205             PyErr_SetString(PyExc_EOFError,
    206                             "compressed file ended before the "
    207                             "logical end-of-stream was detected");
    208             ret = 1;
    209             break;
    210 
    211         case BZ_SEQUENCE_ERROR:
    212             PyErr_SetString(PyExc_RuntimeError,
    213                             "wrong sequence of bz2 library "
    214                             "commands used");
    215             ret = 1;
    216             break;
    217     }
    218     return ret;
    219 }
    220 
    221 #if BUFSIZ < 8192
    222 #define SMALLCHUNK 8192
    223 #else
    224 #define SMALLCHUNK BUFSIZ
    225 #endif
    226 
    227 #if SIZEOF_INT < 4
    228 #define BIGCHUNK  (512 * 32)
    229 #else
    230 #define BIGCHUNK  (512 * 1024)
    231 #endif
    232 
    233 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
    234 static size_t
    235 Util_NewBufferSize(size_t currentsize)
    236 {
    237     if (currentsize > SMALLCHUNK) {
    238         /* Keep doubling until we reach BIGCHUNK;
    239            then keep adding BIGCHUNK. */
    240         if (currentsize <= BIGCHUNK)
    241             return currentsize + currentsize;
    242         else
    243             return currentsize + BIGCHUNK;
    244     }
    245     return currentsize + SMALLCHUNK;
    246 }
    247 
    248 /* This is a hacked version of Python's fileobject.c:get_line(). */
    249 static PyObject *
    250 Util_GetLine(BZ2FileObject *f, int n)
    251 {
    252     char c;
    253     char *buf, *end;
    254     size_t total_v_size;        /* total # of slots in buffer */
    255     size_t used_v_size;         /* # used slots in buffer */
    256     size_t increment;       /* amount to increment the buffer */
    257     PyObject *v;
    258     int bzerror;
    259     int bytes_read;
    260     int newlinetypes = f->f_newlinetypes;
    261     int skipnextlf = f->f_skipnextlf;
    262     int univ_newline = f->f_univ_newline;
    263 
    264     total_v_size = n > 0 ? n : 100;
    265     v = PyString_FromStringAndSize((char *)NULL, total_v_size);
    266     if (v == NULL)
    267         return NULL;
    268 
    269     buf = BUF(v);
    270     end = buf + total_v_size;
    271 
    272     for (;;) {
    273         Py_BEGIN_ALLOW_THREADS
    274         while (buf != end) {
    275             bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
    276             f->pos++;
    277             if (bytes_read == 0) break;
    278             if (univ_newline) {
    279                 if (skipnextlf) {
    280                     skipnextlf = 0;
    281                     if (c == '\n') {
    282                         /* Seeing a \n here with skipnextlf true means we
    283                          * saw a \r before.
    284                          */
    285                         newlinetypes |= NEWLINE_CRLF;
    286                         if (bzerror != BZ_OK) break;
    287                         bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
    288                         f->pos++;
    289                         if (bytes_read == 0) break;
    290                     } else {
    291                         newlinetypes |= NEWLINE_CR;
    292                     }
    293                 }
    294                 if (c == '\r') {
    295                     skipnextlf = 1;
    296                     c = '\n';
    297                 } else if (c == '\n')
    298                     newlinetypes |= NEWLINE_LF;
    299             }
    300             *buf++ = c;
    301             if (bzerror != BZ_OK || c == '\n') break;
    302         }
    303         if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
    304             newlinetypes |= NEWLINE_CR;
    305         Py_END_ALLOW_THREADS
    306         f->f_newlinetypes = newlinetypes;
    307         f->f_skipnextlf = skipnextlf;
    308         if (bzerror == BZ_STREAM_END) {
    309             f->size = f->pos;
    310             f->mode = MODE_READ_EOF;
    311             break;
    312         } else if (bzerror != BZ_OK) {
    313             Util_CatchBZ2Error(bzerror);
    314             Py_DECREF(v);
    315             return NULL;
    316         }
    317         if (c == '\n')
    318             break;
    319         /* Must be because buf == end */
    320         if (n > 0)
    321             break;
    322         used_v_size = total_v_size;
    323         increment = total_v_size >> 2; /* mild exponential growth */
    324         total_v_size += increment;
    325         if (total_v_size > INT_MAX) {
    326             PyErr_SetString(PyExc_OverflowError,
    327                 "line is longer than a Python string can hold");
    328             Py_DECREF(v);
    329             return NULL;
    330         }
    331         if (_PyString_Resize(&v, total_v_size) < 0)
    332             return NULL;
    333         buf = BUF(v) + used_v_size;
    334         end = BUF(v) + total_v_size;
    335     }
    336 
    337     used_v_size = buf - BUF(v);
    338     if (used_v_size != total_v_size)
    339         _PyString_Resize(&v, used_v_size);
    340     return v;
    341 }
    342 
    343 /* This is a hacked version of Python's
    344  * fileobject.c:Py_UniversalNewlineFread(). */
    345 size_t
    346 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
    347                      char* buf, size_t n, BZ2FileObject *f)
    348 {
    349     char *dst = buf;
    350     int newlinetypes, skipnextlf;
    351 
    352     assert(buf != NULL);
    353     assert(stream != NULL);
    354 
    355     if (!f->f_univ_newline)
    356         return BZ2_bzRead(bzerror, stream, buf, n);
    357 
    358     newlinetypes = f->f_newlinetypes;
    359     skipnextlf = f->f_skipnextlf;
    360 
    361     /* Invariant:  n is the number of bytes remaining to be filled
    362      * in the buffer.
    363      */
    364     while (n) {
    365         size_t nread;
    366         int shortread;
    367         char *src = dst;
    368 
    369         nread = BZ2_bzRead(bzerror, stream, dst, n);
    370         assert(nread <= n);
    371         n -= nread; /* assuming 1 byte out for each in; will adjust */
    372         shortread = n != 0;             /* true iff EOF or error */
    373         while (nread--) {
    374             char c = *src++;
    375             if (c == '\r') {
    376                 /* Save as LF and set flag to skip next LF. */
    377                 *dst++ = '\n';
    378                 skipnextlf = 1;
    379             }
    380             else if (skipnextlf && c == '\n') {
    381                 /* Skip LF, and remember we saw CR LF. */
    382                 skipnextlf = 0;
    383                 newlinetypes |= NEWLINE_CRLF;
    384                 ++n;
    385             }
    386             else {
    387                 /* Normal char to be stored in buffer.  Also
    388                  * update the newlinetypes flag if either this
    389                  * is an LF or the previous char was a CR.
    390                  */
    391                 if (c == '\n')
    392                     newlinetypes |= NEWLINE_LF;
    393                 else if (skipnextlf)
    394                     newlinetypes |= NEWLINE_CR;
    395                 *dst++ = c;
    396                 skipnextlf = 0;
    397             }
    398         }
    399         if (shortread) {
    400             /* If this is EOF, update type flags. */
    401             if (skipnextlf && *bzerror == BZ_STREAM_END)
    402                 newlinetypes |= NEWLINE_CR;
    403             break;
    404         }
    405     }
    406     f->f_newlinetypes = newlinetypes;
    407     f->f_skipnextlf = skipnextlf;
    408     return dst - buf;
    409 }
    410 
    411 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
    412 static void
    413 Util_DropReadAhead(BZ2FileObject *f)
    414 {
    415     if (f->f_buf != NULL) {
    416         PyMem_Free(f->f_buf);
    417         f->f_buf = NULL;
    418     }
    419 }
    420 
    421 /* This is a hacked version of Python's fileobject.c:readahead(). */
    422 static int
    423 Util_ReadAhead(BZ2FileObject *f, int bufsize)
    424 {
    425     int chunksize;
    426     int bzerror;
    427 
    428     if (f->f_buf != NULL) {
    429         if((f->f_bufend - f->f_bufptr) >= 1)
    430             return 0;
    431         else
    432             Util_DropReadAhead(f);
    433     }
    434     if (f->mode == MODE_READ_EOF) {
    435         f->f_bufptr = f->f_buf;
    436         f->f_bufend = f->f_buf;
    437         return 0;
    438     }
    439     if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
    440         PyErr_NoMemory();
    441         return -1;
    442     }
    443     Py_BEGIN_ALLOW_THREADS
    444     chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
    445                                      bufsize, f);
    446     Py_END_ALLOW_THREADS
    447     f->pos += chunksize;
    448     if (bzerror == BZ_STREAM_END) {
    449         f->size = f->pos;
    450         f->mode = MODE_READ_EOF;
    451     } else if (bzerror != BZ_OK) {
    452         Util_CatchBZ2Error(bzerror);
    453         Util_DropReadAhead(f);
    454         return -1;
    455     }
    456     f->f_bufptr = f->f_buf;
    457     f->f_bufend = f->f_buf + chunksize;
    458     return 0;
    459 }
    460 
    461 /* This is a hacked version of Python's
    462  * fileobject.c:readahead_get_line_skip(). */
    463 static PyStringObject *
    464 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
    465 {
    466     PyStringObject* s;
    467     char *bufptr;
    468     char *buf;
    469     int len;
    470 
    471     if (f->f_buf == NULL)
    472         if (Util_ReadAhead(f, bufsize) < 0)
    473             return NULL;
    474 
    475     len = f->f_bufend - f->f_bufptr;
    476     if (len == 0)
    477         return (PyStringObject *)
    478             PyString_FromStringAndSize(NULL, skip);
    479     bufptr = memchr(f->f_bufptr, '\n', len);
    480     if (bufptr != NULL) {
    481         bufptr++;                               /* Count the '\n' */
    482         len = bufptr - f->f_bufptr;
    483         s = (PyStringObject *)
    484             PyString_FromStringAndSize(NULL, skip+len);
    485         if (s == NULL)
    486             return NULL;
    487         memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
    488         f->f_bufptr = bufptr;
    489         if (bufptr == f->f_bufend)
    490             Util_DropReadAhead(f);
    491     } else {
    492         bufptr = f->f_bufptr;
    493         buf = f->f_buf;
    494         f->f_buf = NULL;                /* Force new readahead buffer */
    495         s = Util_ReadAheadGetLineSkip(f, skip+len,
    496                                       bufsize + (bufsize>>2));
    497         if (s == NULL) {
    498             PyMem_Free(buf);
    499             return NULL;
    500         }
    501         memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
    502         PyMem_Free(buf);
    503     }
    504     return s;
    505 }
    506 
    507 /* ===================================================================== */
    508 /* Methods of BZ2File. */
    509 
    510 PyDoc_STRVAR(BZ2File_read__doc__,
    511 "read([size]) -> string\n\
    512 \n\
    513 Read at most size uncompressed bytes, returned as a string. If the size\n\
    514 argument is negative or omitted, read until EOF is reached.\n\
    515 ");
    516 
    517 /* This is a hacked version of Python's fileobject.c:file_read(). */
    518 static PyObject *
    519 BZ2File_read(BZ2FileObject *self, PyObject *args)
    520 {
    521     long bytesrequested = -1;
    522     size_t bytesread, buffersize, chunksize;
    523     int bzerror;
    524     PyObject *ret = NULL;
    525 
    526     if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
    527         return NULL;
    528 
    529     ACQUIRE_LOCK(self);
    530     switch (self->mode) {
    531         case MODE_READ:
    532             break;
    533         case MODE_READ_EOF:
    534             ret = PyString_FromString("");
    535             goto cleanup;
    536         case MODE_CLOSED:
    537             PyErr_SetString(PyExc_ValueError,
    538                             "I/O operation on closed file");
    539             goto cleanup;
    540         default:
    541             PyErr_SetString(PyExc_IOError,
    542                             "file is not ready for reading");
    543             goto cleanup;
    544     }
    545 
    546     /* refuse to mix with f.next() */
    547     if (check_iterbuffered(self))
    548         goto cleanup;
    549 
    550     if (bytesrequested < 0)
    551         buffersize = Util_NewBufferSize((size_t)0);
    552     else
    553         buffersize = bytesrequested;
    554     if (buffersize > INT_MAX) {
    555         PyErr_SetString(PyExc_OverflowError,
    556                         "requested number of bytes is "
    557                         "more than a Python string can hold");
    558         goto cleanup;
    559     }
    560     ret = PyString_FromStringAndSize((char *)NULL, buffersize);
    561     if (ret == NULL)
    562         goto cleanup;
    563     bytesread = 0;
    564 
    565     for (;;) {
    566         Py_BEGIN_ALLOW_THREADS
    567         chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
    568                                          BUF(ret)+bytesread,
    569                                          buffersize-bytesread,
    570                                          self);
    571         self->pos += chunksize;
    572         Py_END_ALLOW_THREADS
    573         bytesread += chunksize;
    574         if (bzerror == BZ_STREAM_END) {
    575             self->size = self->pos;
    576             self->mode = MODE_READ_EOF;
    577             break;
    578         } else if (bzerror != BZ_OK) {
    579             Util_CatchBZ2Error(bzerror);
    580             Py_DECREF(ret);
    581             ret = NULL;
    582             goto cleanup;
    583         }
    584         if (bytesrequested < 0) {
    585             buffersize = Util_NewBufferSize(buffersize);
    586             if (_PyString_Resize(&ret, buffersize) < 0)
    587                 goto cleanup;
    588         } else {
    589             break;
    590         }
    591     }
    592     if (bytesread != buffersize)
    593         _PyString_Resize(&ret, bytesread);
    594 
    595 cleanup:
    596     RELEASE_LOCK(self);
    597     return ret;
    598 }
    599 
    600 PyDoc_STRVAR(BZ2File_readline__doc__,
    601 "readline([size]) -> string\n\
    602 \n\
    603 Return the next line from the file, as a string, retaining newline.\n\
    604 A non-negative size argument will limit the maximum number of bytes to\n\
    605 return (an incomplete line may be returned then). Return an empty\n\
    606 string at EOF.\n\
    607 ");
    608 
    609 static PyObject *
    610 BZ2File_readline(BZ2FileObject *self, PyObject *args)
    611 {
    612     PyObject *ret = NULL;
    613     int sizehint = -1;
    614 
    615     if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
    616         return NULL;
    617 
    618     ACQUIRE_LOCK(self);
    619     switch (self->mode) {
    620         case MODE_READ:
    621             break;
    622         case MODE_READ_EOF:
    623             ret = PyString_FromString("");
    624             goto cleanup;
    625         case MODE_CLOSED:
    626             PyErr_SetString(PyExc_ValueError,
    627                             "I/O operation on closed file");
    628             goto cleanup;
    629         default:
    630             PyErr_SetString(PyExc_IOError,
    631                             "file is not ready for reading");
    632             goto cleanup;
    633     }
    634 
    635     /* refuse to mix with f.next() */
    636     if (check_iterbuffered(self))
    637         goto cleanup;
    638 
    639     if (sizehint == 0)
    640         ret = PyString_FromString("");
    641     else
    642         ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
    643 
    644 cleanup:
    645     RELEASE_LOCK(self);
    646     return ret;
    647 }
    648 
    649 PyDoc_STRVAR(BZ2File_readlines__doc__,
    650 "readlines([size]) -> list\n\
    651 \n\
    652 Call readline() repeatedly and return a list of lines read.\n\
    653 The optional size argument, if given, is an approximate bound on the\n\
    654 total number of bytes in the lines returned.\n\
    655 ");
    656 
    657 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
    658 static PyObject *
    659 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
    660 {
    661     long sizehint = 0;
    662     PyObject *list = NULL;
    663     PyObject *line;
    664     char small_buffer[SMALLCHUNK];
    665     char *buffer = small_buffer;
    666     size_t buffersize = SMALLCHUNK;
    667     PyObject *big_buffer = NULL;
    668     size_t nfilled = 0;
    669     size_t nread;
    670     size_t totalread = 0;
    671     char *p, *q, *end;
    672     int err;
    673     int shortread = 0;
    674     int bzerror;
    675 
    676     if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
    677         return NULL;
    678 
    679     ACQUIRE_LOCK(self);
    680     switch (self->mode) {
    681         case MODE_READ:
    682             break;
    683         case MODE_READ_EOF:
    684             list = PyList_New(0);
    685             goto cleanup;
    686         case MODE_CLOSED:
    687             PyErr_SetString(PyExc_ValueError,
    688                             "I/O operation on closed file");
    689             goto cleanup;
    690         default:
    691             PyErr_SetString(PyExc_IOError,
    692                             "file is not ready for reading");
    693             goto cleanup;
    694     }
    695 
    696     /* refuse to mix with f.next() */
    697     if (check_iterbuffered(self))
    698         goto cleanup;
    699 
    700     if ((list = PyList_New(0)) == NULL)
    701         goto cleanup;
    702 
    703     for (;;) {
    704         Py_BEGIN_ALLOW_THREADS
    705         nread = Util_UnivNewlineRead(&bzerror, self->fp,
    706                                      buffer+nfilled,
    707                                      buffersize-nfilled, self);
    708         self->pos += nread;
    709         Py_END_ALLOW_THREADS
    710         if (bzerror == BZ_STREAM_END) {
    711             self->size = self->pos;
    712             self->mode = MODE_READ_EOF;
    713             if (nread == 0) {
    714                 sizehint = 0;
    715                 break;
    716             }
    717             shortread = 1;
    718         } else if (bzerror != BZ_OK) {
    719             Util_CatchBZ2Error(bzerror);
    720           error:
    721             Py_DECREF(list);
    722             list = NULL;
    723             goto cleanup;
    724         }
    725         totalread += nread;
    726         p = memchr(buffer+nfilled, '\n', nread);
    727         if (!shortread && p == NULL) {
    728             /* Need a larger buffer to fit this line */
    729             nfilled += nread;
    730             buffersize *= 2;
    731             if (buffersize > INT_MAX) {
    732                 PyErr_SetString(PyExc_OverflowError,
    733                 "line is longer than a Python string can hold");
    734                 goto error;
    735             }
    736             if (big_buffer == NULL) {
    737                 /* Create the big buffer */
    738                 big_buffer = PyString_FromStringAndSize(
    739                     NULL, buffersize);
    740                 if (big_buffer == NULL)
    741                     goto error;
    742                 buffer = PyString_AS_STRING(big_buffer);
    743                 memcpy(buffer, small_buffer, nfilled);
    744             }
    745             else {
    746                 /* Grow the big buffer */
    747                 _PyString_Resize(&big_buffer, buffersize);
    748                 buffer = PyString_AS_STRING(big_buffer);
    749             }
    750             continue;
    751         }
    752         end = buffer+nfilled+nread;
    753         q = buffer;
    754         while (p != NULL) {
    755             /* Process complete lines */
    756             p++;
    757             line = PyString_FromStringAndSize(q, p-q);
    758             if (line == NULL)
    759                 goto error;
    760             err = PyList_Append(list, line);
    761             Py_DECREF(line);
    762             if (err != 0)
    763                 goto error;
    764             q = p;
    765             p = memchr(q, '\n', end-q);
    766         }
    767         /* Move the remaining incomplete line to the start */
    768         nfilled = end-q;
    769         memmove(buffer, q, nfilled);
    770         if (sizehint > 0)
    771             if (totalread >= (size_t)sizehint)
    772                 break;
    773         if (shortread) {
    774             sizehint = 0;
    775             break;
    776         }
    777     }
    778     if (nfilled != 0) {
    779         /* Partial last line */
    780         line = PyString_FromStringAndSize(buffer, nfilled);
    781         if (line == NULL)
    782             goto error;
    783         if (sizehint > 0) {
    784             /* Need to complete the last line */
    785             PyObject *rest = Util_GetLine(self, 0);
    786             if (rest == NULL) {
    787                 Py_DECREF(line);
    788                 goto error;
    789             }
    790             PyString_Concat(&line, rest);
    791             Py_DECREF(rest);
    792             if (line == NULL)
    793                 goto error;
    794         }
    795         err = PyList_Append(list, line);
    796         Py_DECREF(line);
    797         if (err != 0)
    798             goto error;
    799     }
    800 
    801   cleanup:
    802     RELEASE_LOCK(self);
    803     if (big_buffer) {
    804         Py_DECREF(big_buffer);
    805     }
    806     return list;
    807 }
    808 
    809 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
    810 "xreadlines() -> self\n\
    811 \n\
    812 For backward compatibility. BZ2File objects now include the performance\n\
    813 optimizations previously implemented in the xreadlines module.\n\
    814 ");
    815 
    816 PyDoc_STRVAR(BZ2File_write__doc__,
    817 "write(data) -> None\n\
    818 \n\
    819 Write the 'data' string to file. Note that due to buffering, close() may\n\
    820 be needed before the file on disk reflects the data written.\n\
    821 ");
    822 
    823 /* This is a hacked version of Python's fileobject.c:file_write(). */
    824 static PyObject *
    825 BZ2File_write(BZ2FileObject *self, PyObject *args)
    826 {
    827     PyObject *ret = NULL;
    828     Py_buffer pbuf;
    829     char *buf;
    830     int len;
    831     int bzerror;
    832 
    833     if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
    834         return NULL;
    835     buf = pbuf.buf;
    836     len = pbuf.len;
    837 
    838     ACQUIRE_LOCK(self);
    839     switch (self->mode) {
    840         case MODE_WRITE:
    841             break;
    842 
    843         case MODE_CLOSED:
    844             PyErr_SetString(PyExc_ValueError,
    845                             "I/O operation on closed file");
    846             goto cleanup;
    847 
    848         default:
    849             PyErr_SetString(PyExc_IOError,
    850                             "file is not ready for writing");
    851             goto cleanup;
    852     }
    853 
    854     self->f_softspace = 0;
    855 
    856     Py_BEGIN_ALLOW_THREADS
    857     BZ2_bzWrite (&bzerror, self->fp, buf, len);
    858     self->pos += len;
    859     Py_END_ALLOW_THREADS
    860 
    861     if (bzerror != BZ_OK) {
    862         Util_CatchBZ2Error(bzerror);
    863         goto cleanup;
    864     }
    865 
    866     Py_INCREF(Py_None);
    867     ret = Py_None;
    868 
    869 cleanup:
    870     PyBuffer_Release(&pbuf);
    871     RELEASE_LOCK(self);
    872     return ret;
    873 }
    874 
    875 PyDoc_STRVAR(BZ2File_writelines__doc__,
    876 "writelines(sequence_of_strings) -> None\n\
    877 \n\
    878 Write the sequence of strings to the file. Note that newlines are not\n\
    879 added. The sequence can be any iterable object producing strings. This is\n\
    880 equivalent to calling write() for each string.\n\
    881 ");
    882 
    883 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
    884 static PyObject *
    885 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
    886 {
    887 #define CHUNKSIZE 1000
    888     PyObject *list = NULL;
    889     PyObject *iter = NULL;
    890     PyObject *ret = NULL;
    891     PyObject *line;
    892     int i, j, index, len, islist;
    893     int bzerror;
    894 
    895     ACQUIRE_LOCK(self);
    896     switch (self->mode) {
    897         case MODE_WRITE:
    898             break;
    899 
    900         case MODE_CLOSED:
    901             PyErr_SetString(PyExc_ValueError,
    902                             "I/O operation on closed file");
    903             goto error;
    904 
    905         default:
    906             PyErr_SetString(PyExc_IOError,
    907                             "file is not ready for writing");
    908             goto error;
    909     }
    910 
    911     islist = PyList_Check(seq);
    912     if  (!islist) {
    913         iter = PyObject_GetIter(seq);
    914         if (iter == NULL) {
    915             PyErr_SetString(PyExc_TypeError,
    916                 "writelines() requires an iterable argument");
    917             goto error;
    918         }
    919         list = PyList_New(CHUNKSIZE);
    920         if (list == NULL)
    921             goto error;
    922     }
    923 
    924     /* Strategy: slurp CHUNKSIZE lines into a private list,
    925        checking that they are all strings, then write that list
    926        without holding the interpreter lock, then come back for more. */
    927     for (index = 0; ; index += CHUNKSIZE) {
    928         if (islist) {
    929             Py_XDECREF(list);
    930             list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
    931             if (list == NULL)
    932                 goto error;
    933             j = PyList_GET_SIZE(list);
    934         }
    935         else {
    936             for (j = 0; j < CHUNKSIZE; j++) {
    937                 line = PyIter_Next(iter);
    938                 if (line == NULL) {
    939                     if (PyErr_Occurred())
    940                         goto error;
    941                     break;
    942                 }
    943                 PyList_SetItem(list, j, line);
    944             }
    945         }
    946         if (j == 0)
    947             break;
    948 
    949         /* Check that all entries are indeed strings. If not,
    950            apply the same rules as for file.write() and
    951            convert the rets to strings. This is slow, but
    952            seems to be the only way since all conversion APIs
    953            could potentially execute Python code. */
    954         for (i = 0; i < j; i++) {
    955             PyObject *v = PyList_GET_ITEM(list, i);
    956             if (!PyString_Check(v)) {
    957                 const char *buffer;
    958                 Py_ssize_t len;
    959                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
    960                     PyErr_SetString(PyExc_TypeError,
    961                                     "writelines() "
    962                                     "argument must be "
    963                                     "a sequence of "
    964                                     "strings");
    965                     goto error;
    966                 }
    967                 line = PyString_FromStringAndSize(buffer,
    968                                                   len);
    969                 if (line == NULL)
    970                     goto error;
    971                 Py_DECREF(v);
    972                 PyList_SET_ITEM(list, i, line);
    973             }
    974         }
    975 
    976         self->f_softspace = 0;
    977 
    978         /* Since we are releasing the global lock, the
    979            following code may *not* execute Python code. */
    980         Py_BEGIN_ALLOW_THREADS
    981         for (i = 0; i < j; i++) {
    982             line = PyList_GET_ITEM(list, i);
    983             len = PyString_GET_SIZE(line);
    984             BZ2_bzWrite (&bzerror, self->fp,
    985                          PyString_AS_STRING(line), len);
    986             if (bzerror != BZ_OK) {
    987                 Py_BLOCK_THREADS
    988                 Util_CatchBZ2Error(bzerror);
    989                 goto error;
    990             }
    991         }
    992         Py_END_ALLOW_THREADS
    993 
    994         if (j < CHUNKSIZE)
    995             break;
    996     }
    997 
    998     Py_INCREF(Py_None);
    999     ret = Py_None;
   1000 
   1001   error:
   1002     RELEASE_LOCK(self);
   1003     Py_XDECREF(list);
   1004     Py_XDECREF(iter);
   1005     return ret;
   1006 #undef CHUNKSIZE
   1007 }
   1008 
   1009 PyDoc_STRVAR(BZ2File_seek__doc__,
   1010 "seek(offset [, whence]) -> None\n\
   1011 \n\
   1012 Move to new file position. Argument offset is a byte count. Optional\n\
   1013 argument whence defaults to 0 (offset from start of file, offset\n\
   1014 should be >= 0); other values are 1 (move relative to current position,\n\
   1015 positive or negative), and 2 (move relative to end of file, usually\n\
   1016 negative, although many platforms allow seeking beyond the end of a file).\n\
   1017 \n\
   1018 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
   1019 the operation may be extremely slow.\n\
   1020 ");
   1021 
   1022 static PyObject *
   1023 BZ2File_seek(BZ2FileObject *self, PyObject *args)
   1024 {
   1025     int where = 0;
   1026     PyObject *offobj;
   1027     Py_off_t offset;
   1028     char small_buffer[SMALLCHUNK];
   1029     char *buffer = small_buffer;
   1030     size_t buffersize = SMALLCHUNK;
   1031     Py_off_t bytesread = 0;
   1032     size_t readsize;
   1033     int chunksize;
   1034     int bzerror;
   1035     PyObject *ret = NULL;
   1036 
   1037     if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
   1038         return NULL;
   1039 #if !defined(HAVE_LARGEFILE_SUPPORT)
   1040     offset = PyInt_AsLong(offobj);
   1041 #else
   1042     offset = PyLong_Check(offobj) ?
   1043         PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
   1044 #endif
   1045     if (PyErr_Occurred())
   1046         return NULL;
   1047 
   1048     ACQUIRE_LOCK(self);
   1049     Util_DropReadAhead(self);
   1050     switch (self->mode) {
   1051         case MODE_READ:
   1052         case MODE_READ_EOF:
   1053             break;
   1054 
   1055         case MODE_CLOSED:
   1056             PyErr_SetString(PyExc_ValueError,
   1057                             "I/O operation on closed file");
   1058             goto cleanup;
   1059 
   1060         default:
   1061             PyErr_SetString(PyExc_IOError,
   1062                             "seek works only while reading");
   1063             goto cleanup;
   1064     }
   1065 
   1066     if (where == 2) {
   1067         if (self->size == -1) {
   1068             assert(self->mode != MODE_READ_EOF);
   1069             for (;;) {
   1070                 Py_BEGIN_ALLOW_THREADS
   1071                 chunksize = Util_UnivNewlineRead(
   1072                                 &bzerror, self->fp,
   1073                                 buffer, buffersize,
   1074                                 self);
   1075                 self->pos += chunksize;
   1076                 Py_END_ALLOW_THREADS
   1077 
   1078                 bytesread += chunksize;
   1079                 if (bzerror == BZ_STREAM_END) {
   1080                     break;
   1081                 } else if (bzerror != BZ_OK) {
   1082                     Util_CatchBZ2Error(bzerror);
   1083                     goto cleanup;
   1084                 }
   1085             }
   1086             self->mode = MODE_READ_EOF;
   1087             self->size = self->pos;
   1088             bytesread = 0;
   1089         }
   1090         offset = self->size + offset;
   1091     } else if (where == 1) {
   1092         offset = self->pos + offset;
   1093     }
   1094 
   1095     /* Before getting here, offset must be the absolute position the file
   1096      * pointer should be set to. */
   1097 
   1098     if (offset >= self->pos) {
   1099         /* we can move forward */
   1100         offset -= self->pos;
   1101     } else {
   1102         /* we cannot move back, so rewind the stream */
   1103         BZ2_bzReadClose(&bzerror, self->fp);
   1104         if (self->fp) {
   1105             PyFile_DecUseCount((PyFileObject *)self->file);
   1106             self->fp = NULL;
   1107         }
   1108         if (bzerror != BZ_OK) {
   1109             Util_CatchBZ2Error(bzerror);
   1110             goto cleanup;
   1111         }
   1112         ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
   1113         if (!ret)
   1114             goto cleanup;
   1115         Py_DECREF(ret);
   1116         ret = NULL;
   1117         self->pos = 0;
   1118         self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
   1119                                   0, 0, NULL, 0);
   1120         if (self->fp)
   1121             PyFile_IncUseCount((PyFileObject *)self->file);
   1122         if (bzerror != BZ_OK) {
   1123             Util_CatchBZ2Error(bzerror);
   1124             goto cleanup;
   1125         }
   1126         self->mode = MODE_READ;
   1127     }
   1128 
   1129     if (offset <= 0 || self->mode == MODE_READ_EOF)
   1130         goto exit;
   1131 
   1132     /* Before getting here, offset must be set to the number of bytes
   1133      * to walk forward. */
   1134     for (;;) {
   1135         if (offset-bytesread > buffersize)
   1136             readsize = buffersize;
   1137         else
   1138             /* offset might be wider that readsize, but the result
   1139              * of the subtraction is bound by buffersize (see the
   1140              * condition above). buffersize is 8192. */
   1141             readsize = (size_t)(offset-bytesread);
   1142         Py_BEGIN_ALLOW_THREADS
   1143         chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
   1144                                          buffer, readsize, self);
   1145         self->pos += chunksize;
   1146         Py_END_ALLOW_THREADS
   1147         bytesread += chunksize;
   1148         if (bzerror == BZ_STREAM_END) {
   1149             self->size = self->pos;
   1150             self->mode = MODE_READ_EOF;
   1151             break;
   1152         } else if (bzerror != BZ_OK) {
   1153             Util_CatchBZ2Error(bzerror);
   1154             goto cleanup;
   1155         }
   1156         if (bytesread == offset)
   1157             break;
   1158     }
   1159 
   1160 exit:
   1161     Py_INCREF(Py_None);
   1162     ret = Py_None;
   1163 
   1164 cleanup:
   1165     RELEASE_LOCK(self);
   1166     return ret;
   1167 }
   1168 
   1169 PyDoc_STRVAR(BZ2File_tell__doc__,
   1170 "tell() -> int\n\
   1171 \n\
   1172 Return the current file position, an integer (may be a long integer).\n\
   1173 ");
   1174 
   1175 static PyObject *
   1176 BZ2File_tell(BZ2FileObject *self, PyObject *args)
   1177 {
   1178     PyObject *ret = NULL;
   1179 
   1180     if (self->mode == MODE_CLOSED) {
   1181         PyErr_SetString(PyExc_ValueError,
   1182                         "I/O operation on closed file");
   1183         goto cleanup;
   1184     }
   1185 
   1186 #if !defined(HAVE_LARGEFILE_SUPPORT)
   1187     ret = PyInt_FromLong(self->pos);
   1188 #else
   1189     ret = PyLong_FromLongLong(self->pos);
   1190 #endif
   1191 
   1192 cleanup:
   1193     return ret;
   1194 }
   1195 
   1196 PyDoc_STRVAR(BZ2File_close__doc__,
   1197 "close() -> None or (perhaps) an integer\n\
   1198 \n\
   1199 Close the file. Sets data attribute .closed to true. A closed file\n\
   1200 cannot be used for further I/O operations. close() may be called more\n\
   1201 than once without error.\n\
   1202 ");
   1203 
   1204 static PyObject *
   1205 BZ2File_close(BZ2FileObject *self)
   1206 {
   1207     PyObject *ret = NULL;
   1208     int bzerror = BZ_OK;
   1209 
   1210     ACQUIRE_LOCK(self);
   1211     switch (self->mode) {
   1212         case MODE_READ:
   1213         case MODE_READ_EOF:
   1214             BZ2_bzReadClose(&bzerror, self->fp);
   1215             break;
   1216         case MODE_WRITE:
   1217             BZ2_bzWriteClose(&bzerror, self->fp,
   1218                              0, NULL, NULL);
   1219             break;
   1220     }
   1221     if (self->fp) {
   1222         PyFile_DecUseCount((PyFileObject *)self->file);
   1223         self->fp = NULL;
   1224     }
   1225     self->mode = MODE_CLOSED;
   1226     ret = PyObject_CallMethod(self->file, "close", NULL);
   1227     if (bzerror != BZ_OK) {
   1228         Util_CatchBZ2Error(bzerror);
   1229         Py_XDECREF(ret);
   1230         ret = NULL;
   1231     }
   1232 
   1233     RELEASE_LOCK(self);
   1234     return ret;
   1235 }
   1236 
   1237 PyDoc_STRVAR(BZ2File_enter_doc,
   1238 "__enter__() -> self.");
   1239 
   1240 static PyObject *
   1241 BZ2File_enter(BZ2FileObject *self)
   1242 {
   1243     if (self->mode == MODE_CLOSED) {
   1244         PyErr_SetString(PyExc_ValueError,
   1245             "I/O operation on closed file");
   1246         return NULL;
   1247     }
   1248     Py_INCREF(self);
   1249     return (PyObject *) self;
   1250 }
   1251 
   1252 PyDoc_STRVAR(BZ2File_exit_doc,
   1253 "__exit__(*excinfo) -> None.  Closes the file.");
   1254 
   1255 static PyObject *
   1256 BZ2File_exit(BZ2FileObject *self, PyObject *args)
   1257 {
   1258     PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
   1259     if (!ret)
   1260         /* If error occurred, pass through */
   1261         return NULL;
   1262     Py_DECREF(ret);
   1263     Py_RETURN_NONE;
   1264 }
   1265 
   1266 
   1267 static PyObject *BZ2File_getiter(BZ2FileObject *self);
   1268 
   1269 static PyMethodDef BZ2File_methods[] = {
   1270     {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
   1271     {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
   1272     {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
   1273     {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
   1274     {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
   1275     {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
   1276     {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
   1277     {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
   1278     {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
   1279     {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
   1280     {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
   1281     {NULL,              NULL}           /* sentinel */
   1282 };
   1283 
   1284 
   1285 /* ===================================================================== */
   1286 /* Getters and setters of BZ2File. */
   1287 
   1288 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
   1289 static PyObject *
   1290 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
   1291 {
   1292     switch (self->f_newlinetypes) {
   1293     case NEWLINE_UNKNOWN:
   1294         Py_INCREF(Py_None);
   1295         return Py_None;
   1296     case NEWLINE_CR:
   1297         return PyString_FromString("\r");
   1298     case NEWLINE_LF:
   1299         return PyString_FromString("\n");
   1300     case NEWLINE_CR|NEWLINE_LF:
   1301         return Py_BuildValue("(ss)", "\r", "\n");
   1302     case NEWLINE_CRLF:
   1303         return PyString_FromString("\r\n");
   1304     case NEWLINE_CR|NEWLINE_CRLF:
   1305         return Py_BuildValue("(ss)", "\r", "\r\n");
   1306     case NEWLINE_LF|NEWLINE_CRLF:
   1307         return Py_BuildValue("(ss)", "\n", "\r\n");
   1308     case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
   1309         return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
   1310     default:
   1311         PyErr_Format(PyExc_SystemError,
   1312                      "Unknown newlines value 0x%x\n",
   1313                      self->f_newlinetypes);
   1314         return NULL;
   1315     }
   1316 }
   1317 
   1318 static PyObject *
   1319 BZ2File_get_closed(BZ2FileObject *self, void *closure)
   1320 {
   1321     return PyInt_FromLong(self->mode == MODE_CLOSED);
   1322 }
   1323 
   1324 static PyObject *
   1325 BZ2File_get_mode(BZ2FileObject *self, void *closure)
   1326 {
   1327     return PyObject_GetAttrString(self->file, "mode");
   1328 }
   1329 
   1330 static PyObject *
   1331 BZ2File_get_name(BZ2FileObject *self, void *closure)
   1332 {
   1333     return PyObject_GetAttrString(self->file, "name");
   1334 }
   1335 
   1336 static PyGetSetDef BZ2File_getset[] = {
   1337     {"closed", (getter)BZ2File_get_closed, NULL,
   1338                     "True if the file is closed"},
   1339     {"newlines", (getter)BZ2File_get_newlines, NULL,
   1340                     "end-of-line convention used in this file"},
   1341     {"mode", (getter)BZ2File_get_mode, NULL,
   1342                     "file mode ('r', 'w', or 'U')"},
   1343     {"name", (getter)BZ2File_get_name, NULL,
   1344                     "file name"},
   1345     {NULL}      /* Sentinel */
   1346 };
   1347 
   1348 
   1349 /* ===================================================================== */
   1350 /* Members of BZ2File_Type. */
   1351 
   1352 #undef OFF
   1353 #define OFF(x) offsetof(BZ2FileObject, x)
   1354 
   1355 static PyMemberDef BZ2File_members[] = {
   1356     {"softspace",       T_INT,          OFF(f_softspace), 0,
   1357      "flag indicating that a space needs to be printed; used by print"},
   1358     {NULL}      /* Sentinel */
   1359 };
   1360 
   1361 /* ===================================================================== */
   1362 /* Slot definitions for BZ2File_Type. */
   1363 
   1364 static int
   1365 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
   1366 {
   1367     static char *kwlist[] = {"filename", "mode", "buffering",
   1368                                    "compresslevel", 0};
   1369     PyObject *name;
   1370     char *mode = "r";
   1371     int buffering = -1;
   1372     int compresslevel = 9;
   1373     int bzerror;
   1374     int mode_char = 0;
   1375 
   1376     self->size = -1;
   1377 
   1378     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
   1379                                      kwlist, &name, &mode, &buffering,
   1380                                      &compresslevel))
   1381         return -1;
   1382 
   1383     if (compresslevel < 1 || compresslevel > 9) {
   1384         PyErr_SetString(PyExc_ValueError,
   1385                         "compresslevel must be between 1 and 9");
   1386         return -1;
   1387     }
   1388 
   1389     for (;;) {
   1390         int error = 0;
   1391         switch (*mode) {
   1392             case 'r':
   1393             case 'w':
   1394                 if (mode_char)
   1395                     error = 1;
   1396                 mode_char = *mode;
   1397                 break;
   1398 
   1399             case 'b':
   1400                 break;
   1401 
   1402             case 'U':
   1403 #ifdef __VMS
   1404                 self->f_univ_newline = 0;
   1405 #else
   1406                 self->f_univ_newline = 1;
   1407 #endif
   1408                 break;
   1409 
   1410             default:
   1411                 error = 1;
   1412                 break;
   1413         }
   1414         if (error) {
   1415             PyErr_Format(PyExc_ValueError,
   1416                          "invalid mode char %c", *mode);
   1417             return -1;
   1418         }
   1419         mode++;
   1420         if (*mode == '\0')
   1421             break;
   1422     }
   1423 
   1424     if (mode_char == 0) {
   1425         mode_char = 'r';
   1426     }
   1427 
   1428     mode = (mode_char == 'r') ? "rb" : "wb";
   1429 
   1430     self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
   1431                                        name, mode, buffering);
   1432     if (self->file == NULL)
   1433         return -1;
   1434 
   1435     /* From now on, we have stuff to dealloc, so jump to error label
   1436      * instead of returning */
   1437 
   1438 #ifdef WITH_THREAD
   1439     self->lock = PyThread_allocate_lock();
   1440     if (!self->lock) {
   1441         PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
   1442         goto error;
   1443     }
   1444 #endif
   1445 
   1446     if (mode_char == 'r')
   1447         self->fp = BZ2_bzReadOpen(&bzerror,
   1448                                   PyFile_AsFile(self->file),
   1449                                   0, 0, NULL, 0);
   1450     else
   1451         self->fp = BZ2_bzWriteOpen(&bzerror,
   1452                                    PyFile_AsFile(self->file),
   1453                                    compresslevel, 0, 0);
   1454 
   1455     if (bzerror != BZ_OK) {
   1456         Util_CatchBZ2Error(bzerror);
   1457         goto error;
   1458     }
   1459     PyFile_IncUseCount((PyFileObject *)self->file);
   1460 
   1461     self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
   1462 
   1463     return 0;
   1464 
   1465 error:
   1466     Py_CLEAR(self->file);
   1467 #ifdef WITH_THREAD
   1468     if (self->lock) {
   1469         PyThread_free_lock(self->lock);
   1470         self->lock = NULL;
   1471     }
   1472 #endif
   1473     return -1;
   1474 }
   1475 
   1476 static void
   1477 BZ2File_dealloc(BZ2FileObject *self)
   1478 {
   1479     int bzerror;
   1480 #ifdef WITH_THREAD
   1481     if (self->lock)
   1482         PyThread_free_lock(self->lock);
   1483 #endif
   1484     switch (self->mode) {
   1485         case MODE_READ:
   1486         case MODE_READ_EOF:
   1487             BZ2_bzReadClose(&bzerror, self->fp);
   1488             break;
   1489         case MODE_WRITE:
   1490             BZ2_bzWriteClose(&bzerror, self->fp,
   1491                              0, NULL, NULL);
   1492             break;
   1493     }
   1494     if (self->fp) {
   1495         PyFile_DecUseCount((PyFileObject *)self->file);
   1496         self->fp = NULL;
   1497     }
   1498     Util_DropReadAhead(self);
   1499     Py_XDECREF(self->file);
   1500     Py_TYPE(self)->tp_free((PyObject *)self);
   1501 }
   1502 
   1503 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
   1504 static PyObject *
   1505 BZ2File_getiter(BZ2FileObject *self)
   1506 {
   1507     if (self->mode == MODE_CLOSED) {
   1508         PyErr_SetString(PyExc_ValueError,
   1509                         "I/O operation on closed file");
   1510         return NULL;
   1511     }
   1512     Py_INCREF((PyObject*)self);
   1513     return (PyObject *)self;
   1514 }
   1515 
   1516 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
   1517 #define READAHEAD_BUFSIZE 8192
   1518 static PyObject *
   1519 BZ2File_iternext(BZ2FileObject *self)
   1520 {
   1521     PyStringObject* ret;
   1522     ACQUIRE_LOCK(self);
   1523     if (self->mode == MODE_CLOSED) {
   1524         RELEASE_LOCK(self);
   1525         PyErr_SetString(PyExc_ValueError,
   1526                         "I/O operation on closed file");
   1527         return NULL;
   1528     }
   1529     ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
   1530     RELEASE_LOCK(self);
   1531     if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
   1532         Py_XDECREF(ret);
   1533         return NULL;
   1534     }
   1535     return (PyObject *)ret;
   1536 }
   1537 
   1538 /* ===================================================================== */
   1539 /* BZ2File_Type definition. */
   1540 
   1541 PyDoc_VAR(BZ2File__doc__) =
   1542 PyDoc_STR(
   1543 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
   1544 \n\
   1545 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
   1546 writing. When opened for writing, the file will be created if it doesn't\n\
   1547 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
   1548 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
   1549 is given, must be a number between 1 and 9.\n\
   1550 ")
   1551 PyDoc_STR(
   1552 "\n\
   1553 Add a 'U' to mode to open the file for input with universal newline\n\
   1554 support. Any line ending in the input file will be seen as a '\\n' in\n\
   1555 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
   1556 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
   1557 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
   1558 newlines are available only when reading.\n\
   1559 ")
   1560 ;
   1561 
   1562 static PyTypeObject BZ2File_Type = {
   1563     PyVarObject_HEAD_INIT(NULL, 0)
   1564     "bz2.BZ2File",              /*tp_name*/
   1565     sizeof(BZ2FileObject),      /*tp_basicsize*/
   1566     0,                          /*tp_itemsize*/
   1567     (destructor)BZ2File_dealloc, /*tp_dealloc*/
   1568     0,                          /*tp_print*/
   1569     0,                          /*tp_getattr*/
   1570     0,                          /*tp_setattr*/
   1571     0,                          /*tp_compare*/
   1572     0,                          /*tp_repr*/
   1573     0,                          /*tp_as_number*/
   1574     0,                          /*tp_as_sequence*/
   1575     0,                          /*tp_as_mapping*/
   1576     0,                          /*tp_hash*/
   1577     0,                      /*tp_call*/
   1578     0,                      /*tp_str*/
   1579     PyObject_GenericGetAttr,/*tp_getattro*/
   1580     PyObject_GenericSetAttr,/*tp_setattro*/
   1581     0,                      /*tp_as_buffer*/
   1582     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
   1583     BZ2File__doc__,         /*tp_doc*/
   1584     0,                      /*tp_traverse*/
   1585     0,                      /*tp_clear*/
   1586     0,                      /*tp_richcompare*/
   1587     0,                      /*tp_weaklistoffset*/
   1588     (getiterfunc)BZ2File_getiter, /*tp_iter*/
   1589     (iternextfunc)BZ2File_iternext, /*tp_iternext*/
   1590     BZ2File_methods,        /*tp_methods*/
   1591     BZ2File_members,        /*tp_members*/
   1592     BZ2File_getset,         /*tp_getset*/
   1593     0,                      /*tp_base*/
   1594     0,                      /*tp_dict*/
   1595     0,                      /*tp_descr_get*/
   1596     0,                      /*tp_descr_set*/
   1597     0,                      /*tp_dictoffset*/
   1598     (initproc)BZ2File_init, /*tp_init*/
   1599     PyType_GenericAlloc,    /*tp_alloc*/
   1600     PyType_GenericNew,      /*tp_new*/
   1601     _PyObject_Del,          /*tp_free*/
   1602     0,                      /*tp_is_gc*/
   1603 };
   1604 
   1605 
   1606 /* ===================================================================== */
   1607 /* Methods of BZ2Comp. */
   1608 
   1609 PyDoc_STRVAR(BZ2Comp_compress__doc__,
   1610 "compress(data) -> string\n\
   1611 \n\
   1612 Provide more data to the compressor object. It will return chunks of\n\
   1613 compressed data whenever possible. When you've finished providing data\n\
   1614 to compress, call the flush() method to finish the compression process,\n\
   1615 and return what is left in the internal buffers.\n\
   1616 ");
   1617 
   1618 static PyObject *
   1619 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
   1620 {
   1621     Py_buffer pdata;
   1622     char *data;
   1623     int datasize;
   1624     int bufsize = SMALLCHUNK;
   1625     PY_LONG_LONG totalout;
   1626     PyObject *ret = NULL;
   1627     bz_stream *bzs = &self->bzs;
   1628     int bzerror;
   1629 
   1630     if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
   1631         return NULL;
   1632     data = pdata.buf;
   1633     datasize = pdata.len;
   1634 
   1635     if (datasize == 0) {
   1636         PyBuffer_Release(&pdata);
   1637         return PyString_FromString("");
   1638     }
   1639 
   1640     ACQUIRE_LOCK(self);
   1641     if (!self->running) {
   1642         PyErr_SetString(PyExc_ValueError,
   1643                         "this object was already flushed");
   1644         goto error;
   1645     }
   1646 
   1647     ret = PyString_FromStringAndSize(NULL, bufsize);
   1648     if (!ret)
   1649         goto error;
   1650 
   1651     bzs->next_in = data;
   1652     bzs->avail_in = datasize;
   1653     bzs->next_out = BUF(ret);
   1654     bzs->avail_out = bufsize;
   1655 
   1656     totalout = BZS_TOTAL_OUT(bzs);
   1657 
   1658     for (;;) {
   1659         Py_BEGIN_ALLOW_THREADS
   1660         bzerror = BZ2_bzCompress(bzs, BZ_RUN);
   1661         Py_END_ALLOW_THREADS
   1662         if (bzerror != BZ_RUN_OK) {
   1663             Util_CatchBZ2Error(bzerror);
   1664             goto error;
   1665         }
   1666         if (bzs->avail_in == 0)
   1667             break; /* no more input data */
   1668         if (bzs->avail_out == 0) {
   1669             bufsize = Util_NewBufferSize(bufsize);
   1670             if (_PyString_Resize(&ret, bufsize) < 0) {
   1671                 BZ2_bzCompressEnd(bzs);
   1672                 goto error;
   1673             }
   1674             bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
   1675                                         - totalout);
   1676             bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
   1677         }
   1678     }
   1679 
   1680     _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
   1681 
   1682     RELEASE_LOCK(self);
   1683     PyBuffer_Release(&pdata);
   1684     return ret;
   1685 
   1686 error:
   1687     RELEASE_LOCK(self);
   1688     PyBuffer_Release(&pdata);
   1689     Py_XDECREF(ret);
   1690     return NULL;
   1691 }
   1692 
   1693 PyDoc_STRVAR(BZ2Comp_flush__doc__,
   1694 "flush() -> string\n\
   1695 \n\
   1696 Finish the compression process and return what is left in internal buffers.\n\
   1697 You must not use the compressor object after calling this method.\n\
   1698 ");
   1699 
   1700 static PyObject *
   1701 BZ2Comp_flush(BZ2CompObject *self)
   1702 {
   1703     int bufsize = SMALLCHUNK;
   1704     PyObject *ret = NULL;
   1705     bz_stream *bzs = &self->bzs;
   1706     PY_LONG_LONG totalout;
   1707     int bzerror;
   1708 
   1709     ACQUIRE_LOCK(self);
   1710     if (!self->running) {
   1711         PyErr_SetString(PyExc_ValueError, "object was already "
   1712                                           "flushed");
   1713         goto error;
   1714     }
   1715     self->running = 0;
   1716 
   1717     ret = PyString_FromStringAndSize(NULL, bufsize);
   1718     if (!ret)
   1719         goto error;
   1720 
   1721     bzs->next_out = BUF(ret);
   1722     bzs->avail_out = bufsize;
   1723 
   1724     totalout = BZS_TOTAL_OUT(bzs);
   1725 
   1726     for (;;) {
   1727         Py_BEGIN_ALLOW_THREADS
   1728         bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
   1729         Py_END_ALLOW_THREADS
   1730         if (bzerror == BZ_STREAM_END) {
   1731             break;
   1732         } else if (bzerror != BZ_FINISH_OK) {
   1733             Util_CatchBZ2Error(bzerror);
   1734             goto error;
   1735         }
   1736         if (bzs->avail_out == 0) {
   1737             bufsize = Util_NewBufferSize(bufsize);
   1738             if (_PyString_Resize(&ret, bufsize) < 0)
   1739                 goto error;
   1740             bzs->next_out = BUF(ret);
   1741             bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
   1742                                         - totalout);
   1743             bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
   1744         }
   1745     }
   1746 
   1747     if (bzs->avail_out != 0)
   1748         _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
   1749 
   1750     RELEASE_LOCK(self);
   1751     return ret;
   1752 
   1753 error:
   1754     RELEASE_LOCK(self);
   1755     Py_XDECREF(ret);
   1756     return NULL;
   1757 }
   1758 
   1759 static PyMethodDef BZ2Comp_methods[] = {
   1760     {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
   1761      BZ2Comp_compress__doc__},
   1762     {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
   1763      BZ2Comp_flush__doc__},
   1764     {NULL,              NULL}           /* sentinel */
   1765 };
   1766 
   1767 
   1768 /* ===================================================================== */
   1769 /* Slot definitions for BZ2Comp_Type. */
   1770 
   1771 static int
   1772 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
   1773 {
   1774     int compresslevel = 9;
   1775     int bzerror;
   1776     static char *kwlist[] = {"compresslevel", 0};
   1777 
   1778     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
   1779                                      kwlist, &compresslevel))
   1780         return -1;
   1781 
   1782     if (compresslevel < 1 || compresslevel > 9) {
   1783         PyErr_SetString(PyExc_ValueError,
   1784                         "compresslevel must be between 1 and 9");
   1785         goto error;
   1786     }
   1787 
   1788 #ifdef WITH_THREAD
   1789     self->lock = PyThread_allocate_lock();
   1790     if (!self->lock) {
   1791         PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
   1792         goto error;
   1793     }
   1794 #endif
   1795 
   1796     memset(&self->bzs, 0, sizeof(bz_stream));
   1797     bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
   1798     if (bzerror != BZ_OK) {
   1799         Util_CatchBZ2Error(bzerror);
   1800         goto error;
   1801     }
   1802 
   1803     self->running = 1;
   1804 
   1805     return 0;
   1806 error:
   1807 #ifdef WITH_THREAD
   1808     if (self->lock) {
   1809         PyThread_free_lock(self->lock);
   1810         self->lock = NULL;
   1811     }
   1812 #endif
   1813     return -1;
   1814 }
   1815 
   1816 static void
   1817 BZ2Comp_dealloc(BZ2CompObject *self)
   1818 {
   1819 #ifdef WITH_THREAD
   1820     if (self->lock)
   1821         PyThread_free_lock(self->lock);
   1822 #endif
   1823     BZ2_bzCompressEnd(&self->bzs);
   1824     Py_TYPE(self)->tp_free((PyObject *)self);
   1825 }
   1826 
   1827 
   1828 /* ===================================================================== */
   1829 /* BZ2Comp_Type definition. */
   1830 
   1831 PyDoc_STRVAR(BZ2Comp__doc__,
   1832 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
   1833 \n\
   1834 Create a new compressor object. This object may be used to compress\n\
   1835 data sequentially. If you want to compress data in one shot, use the\n\
   1836 compress() function instead. The compresslevel parameter, if given,\n\
   1837 must be a number between 1 and 9.\n\
   1838 ");
   1839 
   1840 static PyTypeObject BZ2Comp_Type = {
   1841     PyVarObject_HEAD_INIT(NULL, 0)
   1842     "bz2.BZ2Compressor",        /*tp_name*/
   1843     sizeof(BZ2CompObject),      /*tp_basicsize*/
   1844     0,                          /*tp_itemsize*/
   1845     (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
   1846     0,                          /*tp_print*/
   1847     0,                          /*tp_getattr*/
   1848     0,                          /*tp_setattr*/
   1849     0,                          /*tp_compare*/
   1850     0,                          /*tp_repr*/
   1851     0,                          /*tp_as_number*/
   1852     0,                          /*tp_as_sequence*/
   1853     0,                          /*tp_as_mapping*/
   1854     0,                          /*tp_hash*/
   1855     0,                      /*tp_call*/
   1856     0,                      /*tp_str*/
   1857     PyObject_GenericGetAttr,/*tp_getattro*/
   1858     PyObject_GenericSetAttr,/*tp_setattro*/
   1859     0,                      /*tp_as_buffer*/
   1860     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
   1861     BZ2Comp__doc__,         /*tp_doc*/
   1862     0,                      /*tp_traverse*/
   1863     0,                      /*tp_clear*/
   1864     0,                      /*tp_richcompare*/
   1865     0,                      /*tp_weaklistoffset*/
   1866     0,                      /*tp_iter*/
   1867     0,                      /*tp_iternext*/
   1868     BZ2Comp_methods,        /*tp_methods*/
   1869     0,                      /*tp_members*/
   1870     0,                      /*tp_getset*/
   1871     0,                      /*tp_base*/
   1872     0,                      /*tp_dict*/
   1873     0,                      /*tp_descr_get*/
   1874     0,                      /*tp_descr_set*/
   1875     0,                      /*tp_dictoffset*/
   1876     (initproc)BZ2Comp_init, /*tp_init*/
   1877     PyType_GenericAlloc,    /*tp_alloc*/
   1878     PyType_GenericNew,      /*tp_new*/
   1879     _PyObject_Del,          /*tp_free*/
   1880     0,                      /*tp_is_gc*/
   1881 };
   1882 
   1883 
   1884 /* ===================================================================== */
   1885 /* Members of BZ2Decomp. */
   1886 
   1887 #undef OFF
   1888 #define OFF(x) offsetof(BZ2DecompObject, x)
   1889 
   1890 static PyMemberDef BZ2Decomp_members[] = {
   1891     {"unused_data", T_OBJECT, OFF(unused_data), RO},
   1892     {NULL}      /* Sentinel */
   1893 };
   1894 
   1895 
   1896 /* ===================================================================== */
   1897 /* Methods of BZ2Decomp. */
   1898 
   1899 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
   1900 "decompress(data) -> string\n\
   1901 \n\
   1902 Provide more data to the decompressor object. It will return chunks\n\
   1903 of decompressed data whenever possible. If you try to decompress data\n\
   1904 after the end of stream is found, EOFError will be raised. If any data\n\
   1905 was found after the end of stream, it'll be ignored and saved in\n\
   1906 unused_data attribute.\n\
   1907 ");
   1908 
   1909 static PyObject *
   1910 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
   1911 {
   1912     Py_buffer pdata;
   1913     char *data;
   1914     int datasize;
   1915     int bufsize = SMALLCHUNK;
   1916     PY_LONG_LONG totalout;
   1917     PyObject *ret = NULL;
   1918     bz_stream *bzs = &self->bzs;
   1919     int bzerror;
   1920 
   1921     if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
   1922         return NULL;
   1923     data = pdata.buf;
   1924     datasize = pdata.len;
   1925 
   1926     ACQUIRE_LOCK(self);
   1927     if (!self->running) {
   1928         PyErr_SetString(PyExc_EOFError, "end of stream was "
   1929                                         "already found");
   1930         goto error;
   1931     }
   1932 
   1933     ret = PyString_FromStringAndSize(NULL, bufsize);
   1934     if (!ret)
   1935         goto error;
   1936 
   1937     bzs->next_in = data;
   1938     bzs->avail_in = datasize;
   1939     bzs->next_out = BUF(ret);
   1940     bzs->avail_out = bufsize;
   1941 
   1942     totalout = BZS_TOTAL_OUT(bzs);
   1943 
   1944     for (;;) {
   1945         Py_BEGIN_ALLOW_THREADS
   1946         bzerror = BZ2_bzDecompress(bzs);
   1947         Py_END_ALLOW_THREADS
   1948         if (bzerror == BZ_STREAM_END) {
   1949             if (bzs->avail_in != 0) {
   1950                 Py_DECREF(self->unused_data);
   1951                 self->unused_data =
   1952                     PyString_FromStringAndSize(bzs->next_in,
   1953                                                bzs->avail_in);
   1954             }
   1955             self->running = 0;
   1956             break;
   1957         }
   1958         if (bzerror != BZ_OK) {
   1959             Util_CatchBZ2Error(bzerror);
   1960             goto error;
   1961         }
   1962         if (bzs->avail_in == 0)
   1963             break; /* no more input data */
   1964         if (bzs->avail_out == 0) {
   1965             bufsize = Util_NewBufferSize(bufsize);
   1966             if (_PyString_Resize(&ret, bufsize) < 0) {
   1967                 BZ2_bzDecompressEnd(bzs);
   1968                 goto error;
   1969             }
   1970             bzs->next_out = BUF(ret);
   1971             bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
   1972                                         - totalout);
   1973             bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
   1974         }
   1975     }
   1976 
   1977     if (bzs->avail_out != 0)
   1978         _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
   1979 
   1980     RELEASE_LOCK(self);
   1981     PyBuffer_Release(&pdata);
   1982     return ret;
   1983 
   1984 error:
   1985     RELEASE_LOCK(self);
   1986     PyBuffer_Release(&pdata);
   1987     Py_XDECREF(ret);
   1988     return NULL;
   1989 }
   1990 
   1991 static PyMethodDef BZ2Decomp_methods[] = {
   1992     {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
   1993     {NULL,              NULL}           /* sentinel */
   1994 };
   1995 
   1996 
   1997 /* ===================================================================== */
   1998 /* Slot definitions for BZ2Decomp_Type. */
   1999 
   2000 static int
   2001 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
   2002 {
   2003     int bzerror;
   2004 
   2005     if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
   2006         return -1;
   2007 
   2008 #ifdef WITH_THREAD
   2009     self->lock = PyThread_allocate_lock();
   2010     if (!self->lock) {
   2011         PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
   2012         goto error;
   2013     }
   2014 #endif
   2015 
   2016     self->unused_data = PyString_FromString("");
   2017     if (!self->unused_data)
   2018         goto error;
   2019 
   2020     memset(&self->bzs, 0, sizeof(bz_stream));
   2021     bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
   2022     if (bzerror != BZ_OK) {
   2023         Util_CatchBZ2Error(bzerror);
   2024         goto error;
   2025     }
   2026 
   2027     self->running = 1;
   2028 
   2029     return 0;
   2030 
   2031 error:
   2032 #ifdef WITH_THREAD
   2033     if (self->lock) {
   2034         PyThread_free_lock(self->lock);
   2035         self->lock = NULL;
   2036     }
   2037 #endif
   2038     Py_CLEAR(self->unused_data);
   2039     return -1;
   2040 }
   2041 
   2042 static void
   2043 BZ2Decomp_dealloc(BZ2DecompObject *self)
   2044 {
   2045 #ifdef WITH_THREAD
   2046     if (self->lock)
   2047         PyThread_free_lock(self->lock);
   2048 #endif
   2049     Py_XDECREF(self->unused_data);
   2050     BZ2_bzDecompressEnd(&self->bzs);
   2051     Py_TYPE(self)->tp_free((PyObject *)self);
   2052 }
   2053 
   2054 
   2055 /* ===================================================================== */
   2056 /* BZ2Decomp_Type definition. */
   2057 
   2058 PyDoc_STRVAR(BZ2Decomp__doc__,
   2059 "BZ2Decompressor() -> decompressor object\n\
   2060 \n\
   2061 Create a new decompressor object. This object may be used to decompress\n\
   2062 data sequentially. If you want to decompress data in one shot, use the\n\
   2063 decompress() function instead.\n\
   2064 ");
   2065 
   2066 static PyTypeObject BZ2Decomp_Type = {
   2067     PyVarObject_HEAD_INIT(NULL, 0)
   2068     "bz2.BZ2Decompressor",      /*tp_name*/
   2069     sizeof(BZ2DecompObject), /*tp_basicsize*/
   2070     0,                          /*tp_itemsize*/
   2071     (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
   2072     0,                          /*tp_print*/
   2073     0,                          /*tp_getattr*/
   2074     0,                          /*tp_setattr*/
   2075     0,                          /*tp_compare*/
   2076     0,                          /*tp_repr*/
   2077     0,                          /*tp_as_number*/
   2078     0,                          /*tp_as_sequence*/
   2079     0,                          /*tp_as_mapping*/
   2080     0,                          /*tp_hash*/
   2081     0,                      /*tp_call*/
   2082     0,                      /*tp_str*/
   2083     PyObject_GenericGetAttr,/*tp_getattro*/
   2084     PyObject_GenericSetAttr,/*tp_setattro*/
   2085     0,                      /*tp_as_buffer*/
   2086     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
   2087     BZ2Decomp__doc__,       /*tp_doc*/
   2088     0,                      /*tp_traverse*/
   2089     0,                      /*tp_clear*/
   2090     0,                      /*tp_richcompare*/
   2091     0,                      /*tp_weaklistoffset*/
   2092     0,                      /*tp_iter*/
   2093     0,                      /*tp_iternext*/
   2094     BZ2Decomp_methods,      /*tp_methods*/
   2095     BZ2Decomp_members,      /*tp_members*/
   2096     0,                      /*tp_getset*/
   2097     0,                      /*tp_base*/
   2098     0,                      /*tp_dict*/
   2099     0,                      /*tp_descr_get*/
   2100     0,                      /*tp_descr_set*/
   2101     0,                      /*tp_dictoffset*/
   2102     (initproc)BZ2Decomp_init, /*tp_init*/
   2103     PyType_GenericAlloc,    /*tp_alloc*/
   2104     PyType_GenericNew,      /*tp_new*/
   2105     _PyObject_Del,          /*tp_free*/
   2106     0,                      /*tp_is_gc*/
   2107 };
   2108 
   2109 
   2110 /* ===================================================================== */
   2111 /* Module functions. */
   2112 
   2113 PyDoc_STRVAR(bz2_compress__doc__,
   2114 "compress(data [, compresslevel=9]) -> string\n\
   2115 \n\
   2116 Compress data in one shot. If you want to compress data sequentially,\n\
   2117 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
   2118 given, must be a number between 1 and 9.\n\
   2119 ");
   2120 
   2121 static PyObject *
   2122 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
   2123 {
   2124     int compresslevel=9;
   2125     Py_buffer pdata;
   2126     char *data;
   2127     int datasize;
   2128     int bufsize;
   2129     PyObject *ret = NULL;
   2130     bz_stream _bzs;
   2131     bz_stream *bzs = &_bzs;
   2132     int bzerror;
   2133     static char *kwlist[] = {"data", "compresslevel", 0};
   2134 
   2135     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
   2136                                      kwlist, &pdata,
   2137                                      &compresslevel))
   2138         return NULL;
   2139     data = pdata.buf;
   2140     datasize = pdata.len;
   2141 
   2142     if (compresslevel < 1 || compresslevel > 9) {
   2143         PyErr_SetString(PyExc_ValueError,
   2144                         "compresslevel must be between 1 and 9");
   2145         PyBuffer_Release(&pdata);
   2146         return NULL;
   2147     }
   2148 
   2149     /* Conforming to bz2 manual, this is large enough to fit compressed
   2150      * data in one shot. We will check it later anyway. */
   2151     bufsize = datasize + (datasize/100+1) + 600;
   2152 
   2153     ret = PyString_FromStringAndSize(NULL, bufsize);
   2154     if (!ret) {
   2155         PyBuffer_Release(&pdata);
   2156         return NULL;
   2157     }
   2158 
   2159     memset(bzs, 0, sizeof(bz_stream));
   2160 
   2161     bzs->next_in = data;
   2162     bzs->avail_in = datasize;
   2163     bzs->next_out = BUF(ret);
   2164     bzs->avail_out = bufsize;
   2165 
   2166     bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
   2167     if (bzerror != BZ_OK) {
   2168         Util_CatchBZ2Error(bzerror);
   2169         PyBuffer_Release(&pdata);
   2170         Py_DECREF(ret);
   2171         return NULL;
   2172     }
   2173 
   2174     for (;;) {
   2175         Py_BEGIN_ALLOW_THREADS
   2176         bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
   2177         Py_END_ALLOW_THREADS
   2178         if (bzerror == BZ_STREAM_END) {
   2179             break;
   2180         } else if (bzerror != BZ_FINISH_OK) {
   2181             BZ2_bzCompressEnd(bzs);
   2182             Util_CatchBZ2Error(bzerror);
   2183             PyBuffer_Release(&pdata);
   2184             Py_DECREF(ret);
   2185             return NULL;
   2186         }
   2187         if (bzs->avail_out == 0) {
   2188             bufsize = Util_NewBufferSize(bufsize);
   2189             if (_PyString_Resize(&ret, bufsize) < 0) {
   2190                 BZ2_bzCompressEnd(bzs);
   2191                 PyBuffer_Release(&pdata);
   2192                 Py_DECREF(ret);
   2193                 return NULL;
   2194             }
   2195             bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
   2196             bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
   2197         }
   2198     }
   2199 
   2200     if (bzs->avail_out != 0)
   2201         _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
   2202     BZ2_bzCompressEnd(bzs);
   2203 
   2204     PyBuffer_Release(&pdata);
   2205     return ret;
   2206 }
   2207 
   2208 PyDoc_STRVAR(bz2_decompress__doc__,
   2209 "decompress(data) -> decompressed data\n\
   2210 \n\
   2211 Decompress data in one shot. If you want to decompress data sequentially,\n\
   2212 use an instance of BZ2Decompressor instead.\n\
   2213 ");
   2214 
   2215 static PyObject *
   2216 bz2_decompress(PyObject *self, PyObject *args)
   2217 {
   2218     Py_buffer pdata;
   2219     char *data;
   2220     int datasize;
   2221     int bufsize = SMALLCHUNK;
   2222     PyObject *ret;
   2223     bz_stream _bzs;
   2224     bz_stream *bzs = &_bzs;
   2225     int bzerror;
   2226 
   2227     if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
   2228         return NULL;
   2229     data = pdata.buf;
   2230     datasize = pdata.len;
   2231 
   2232     if (datasize == 0) {
   2233         PyBuffer_Release(&pdata);
   2234         return PyString_FromString("");
   2235     }
   2236 
   2237     ret = PyString_FromStringAndSize(NULL, bufsize);
   2238     if (!ret) {
   2239         PyBuffer_Release(&pdata);
   2240         return NULL;
   2241     }
   2242 
   2243     memset(bzs, 0, sizeof(bz_stream));
   2244 
   2245     bzs->next_in = data;
   2246     bzs->avail_in = datasize;
   2247     bzs->next_out = BUF(ret);
   2248     bzs->avail_out = bufsize;
   2249 
   2250     bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
   2251     if (bzerror != BZ_OK) {
   2252         Util_CatchBZ2Error(bzerror);
   2253         Py_DECREF(ret);
   2254         PyBuffer_Release(&pdata);
   2255         return NULL;
   2256     }
   2257 
   2258     for (;;) {
   2259         Py_BEGIN_ALLOW_THREADS
   2260         bzerror = BZ2_bzDecompress(bzs);
   2261         Py_END_ALLOW_THREADS
   2262         if (bzerror == BZ_STREAM_END) {
   2263             break;
   2264         } else if (bzerror != BZ_OK) {
   2265             BZ2_bzDecompressEnd(bzs);
   2266             Util_CatchBZ2Error(bzerror);
   2267             PyBuffer_Release(&pdata);
   2268             Py_DECREF(ret);
   2269             return NULL;
   2270         }
   2271         if (bzs->avail_in == 0) {
   2272             BZ2_bzDecompressEnd(bzs);
   2273             PyErr_SetString(PyExc_ValueError,
   2274                             "couldn't find end of stream");
   2275             PyBuffer_Release(&pdata);
   2276             Py_DECREF(ret);
   2277             return NULL;
   2278         }
   2279         if (bzs->avail_out == 0) {
   2280             bufsize = Util_NewBufferSize(bufsize);
   2281             if (_PyString_Resize(&ret, bufsize) < 0) {
   2282                 BZ2_bzDecompressEnd(bzs);
   2283                 PyBuffer_Release(&pdata);
   2284                 Py_DECREF(ret);
   2285                 return NULL;
   2286             }
   2287             bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
   2288             bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
   2289         }
   2290     }
   2291 
   2292     if (bzs->avail_out != 0)
   2293         _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
   2294     BZ2_bzDecompressEnd(bzs);
   2295     PyBuffer_Release(&pdata);
   2296 
   2297     return ret;
   2298 }
   2299 
   2300 static PyMethodDef bz2_methods[] = {
   2301     {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
   2302         bz2_compress__doc__},
   2303     {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
   2304         bz2_decompress__doc__},
   2305     {NULL,              NULL}           /* sentinel */
   2306 };
   2307 
   2308 /* ===================================================================== */
   2309 /* Initialization function. */
   2310 
   2311 PyDoc_STRVAR(bz2__doc__,
   2312 "The python bz2 module provides a comprehensive interface for\n\
   2313 the bz2 compression library. It implements a complete file\n\
   2314 interface, one shot (de)compression functions, and types for\n\
   2315 sequential (de)compression.\n\
   2316 ");
   2317 
   2318 PyMODINIT_FUNC
   2319 initbz2(void)
   2320 {
   2321     PyObject *m;
   2322 
   2323     if (PyType_Ready(&BZ2File_Type) < 0)
   2324         return;
   2325     if (PyType_Ready(&BZ2Comp_Type) < 0)
   2326         return;
   2327     if (PyType_Ready(&BZ2Decomp_Type) < 0)
   2328         return;
   2329 
   2330     m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
   2331     if (m == NULL)
   2332         return;
   2333 
   2334     PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
   2335 
   2336     Py_INCREF(&BZ2File_Type);
   2337     PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
   2338 
   2339     Py_INCREF(&BZ2Comp_Type);
   2340     PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
   2341 
   2342     Py_INCREF(&BZ2Decomp_Type);
   2343     PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
   2344 }
   2345