1 /* Accumulator struct implementation */ 2 3 #include "Python.h" 4 #include "accu.h" 5 6 static PyObject * 7 join_list_unicode(PyObject *lst) 8 { 9 /* return ''.join(lst) */ 10 PyObject *sep, *ret; 11 sep = PyUnicode_FromStringAndSize("", 0); 12 ret = PyUnicode_Join(sep, lst); 13 Py_DECREF(sep); 14 return ret; 15 } 16 17 int 18 _PyAccu_Init(_PyAccu *acc) 19 { 20 /* Lazily allocated */ 21 acc->large = NULL; 22 acc->small = PyList_New(0); 23 if (acc->small == NULL) 24 return -1; 25 return 0; 26 } 27 28 static int 29 flush_accumulator(_PyAccu *acc) 30 { 31 Py_ssize_t nsmall = PyList_GET_SIZE(acc->small); 32 if (nsmall) { 33 int ret; 34 PyObject *joined; 35 if (acc->large == NULL) { 36 acc->large = PyList_New(0); 37 if (acc->large == NULL) 38 return -1; 39 } 40 joined = join_list_unicode(acc->small); 41 if (joined == NULL) 42 return -1; 43 if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) { 44 Py_DECREF(joined); 45 return -1; 46 } 47 ret = PyList_Append(acc->large, joined); 48 Py_DECREF(joined); 49 return ret; 50 } 51 return 0; 52 } 53 54 int 55 _PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode) 56 { 57 Py_ssize_t nsmall; 58 assert(PyUnicode_Check(unicode)); 59 60 if (PyList_Append(acc->small, unicode)) 61 return -1; 62 nsmall = PyList_GET_SIZE(acc->small); 63 /* Each item in a list of unicode objects has an overhead (in 64-bit 64 * builds) of: 65 * - 8 bytes for the list slot 66 * - 56 bytes for the header of the unicode object 67 * that is, 64 bytes. 100000 such objects waste more than 6 MiB 68 * compared to a single concatenated string. 69 */ 70 if (nsmall < 100000) 71 return 0; 72 return flush_accumulator(acc); 73 } 74 75 PyObject * 76 _PyAccu_FinishAsList(_PyAccu *acc) 77 { 78 int ret; 79 PyObject *res; 80 81 ret = flush_accumulator(acc); 82 Py_CLEAR(acc->small); 83 if (ret) { 84 Py_CLEAR(acc->large); 85 return NULL; 86 } 87 res = acc->large; 88 acc->large = NULL; 89 return res; 90 } 91 92 PyObject * 93 _PyAccu_Finish(_PyAccu *acc) 94 { 95 PyObject *list, *res; 96 if (acc->large == NULL) { 97 list = acc->small; 98 acc->small = NULL; 99 } 100 else { 101 list = _PyAccu_FinishAsList(acc); 102 if (!list) 103 return NULL; 104 } 105 res = join_list_unicode(list); 106 Py_DECREF(list); 107 return res; 108 } 109 110 void 111 _PyAccu_Destroy(_PyAccu *acc) 112 { 113 Py_CLEAR(acc->small); 114 Py_CLEAR(acc->large); 115 } 116