Home | History | Annotate | Download | only in simplejson
      1 #include "Python.h"
      2 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
      3 typedef int Py_ssize_t;
      4 #define PY_SSIZE_T_MAX INT_MAX
      5 #define PY_SSIZE_T_MIN INT_MIN
      6 #endif
      7 
      8 static Py_ssize_t
      9 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
     10 static PyObject *
     11 ascii_escape_unicode(PyObject *pystr);
     12 static PyObject *
     13 ascii_escape_str(PyObject *pystr);
     14 static PyObject *
     15 py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr);
     16 void init_speedups(void);
     17 
     18 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"')
     19 
     20 #define MIN_EXPANSION 6
     21 #ifdef Py_UNICODE_WIDE
     22 #define MAX_EXPANSION (2 * MIN_EXPANSION)
     23 #else
     24 #define MAX_EXPANSION MIN_EXPANSION
     25 #endif
     26 
     27 static Py_ssize_t
     28 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) {
     29     Py_UNICODE x;
     30     output[chars++] = '\\';
     31     switch (c) {
     32         case '/': output[chars++] = (char)c; break;
     33         case '\\': output[chars++] = (char)c; break;
     34         case '"': output[chars++] = (char)c; break;
     35         case '\b': output[chars++] = 'b'; break;
     36         case '\f': output[chars++] = 'f'; break;
     37         case '\n': output[chars++] = 'n'; break;
     38         case '\r': output[chars++] = 'r'; break;
     39         case '\t': output[chars++] = 't'; break;
     40         default:
     41 #ifdef Py_UNICODE_WIDE
     42             if (c >= 0x10000) {
     43                 /* UTF-16 surrogate pair */
     44                 Py_UNICODE v = c - 0x10000;
     45                 c = 0xd800 | ((v >> 10) & 0x3ff);
     46                 output[chars++] = 'u';
     47                 x = (c & 0xf000) >> 12;
     48                 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
     49                 x = (c & 0x0f00) >> 8;
     50                 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
     51                 x = (c & 0x00f0) >> 4;
     52                 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
     53                 x = (c & 0x000f);
     54                 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
     55                 c = 0xdc00 | (v & 0x3ff);
     56                 output[chars++] = '\\';
     57             }
     58 #endif
     59             output[chars++] = 'u';
     60             x = (c & 0xf000) >> 12;
     61             output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
     62             x = (c & 0x0f00) >> 8;
     63             output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
     64             x = (c & 0x00f0) >> 4;
     65             output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
     66             x = (c & 0x000f);
     67             output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
     68     }
     69     return chars;
     70 }
     71 
     72 static PyObject *
     73 ascii_escape_unicode(PyObject *pystr) {
     74     Py_ssize_t i;
     75     Py_ssize_t input_chars;
     76     Py_ssize_t output_size;
     77     Py_ssize_t chars;
     78     PyObject *rval;
     79     char *output;
     80     Py_UNICODE *input_unicode;
     81 
     82     input_chars = PyUnicode_GET_SIZE(pystr);
     83     input_unicode = PyUnicode_AS_UNICODE(pystr);
     84     /* One char input can be up to 6 chars output, estimate 4 of these */
     85     output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
     86     rval = PyString_FromStringAndSize(NULL, output_size);
     87     if (rval == NULL) {
     88         return NULL;
     89     }
     90     output = PyString_AS_STRING(rval);
     91     chars = 0;
     92     output[chars++] = '"';
     93     for (i = 0; i < input_chars; i++) {
     94         Py_UNICODE c = input_unicode[i];
     95         if (S_CHAR(c)) {
     96             output[chars++] = (char)c;
     97         } else {
     98             chars = ascii_escape_char(c, output, chars);
     99         }
    100         if (output_size - chars < (1 + MAX_EXPANSION)) {
    101             /* There's more than four, so let's resize by a lot */
    102             output_size *= 2;
    103             /* This is an upper bound */
    104             if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
    105                 output_size = 2 + (input_chars * MAX_EXPANSION);
    106             }
    107             if (_PyString_Resize(&rval, output_size) == -1) {
    108                 return NULL;
    109             }
    110             output = PyString_AS_STRING(rval);
    111         }
    112     }
    113     output[chars++] = '"';
    114     if (_PyString_Resize(&rval, chars) == -1) {
    115         return NULL;
    116     }
    117     return rval;
    118 }
    119 
    120 static PyObject *
    121 ascii_escape_str(PyObject *pystr) {
    122     Py_ssize_t i;
    123     Py_ssize_t input_chars;
    124     Py_ssize_t output_size;
    125     Py_ssize_t chars;
    126     PyObject *rval;
    127     char *output;
    128     char *input_str;
    129 
    130     input_chars = PyString_GET_SIZE(pystr);
    131     input_str = PyString_AS_STRING(pystr);
    132     /* One char input can be up to 6 chars output, estimate 4 of these */
    133     output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
    134     rval = PyString_FromStringAndSize(NULL, output_size);
    135     if (rval == NULL) {
    136         return NULL;
    137     }
    138     output = PyString_AS_STRING(rval);
    139     chars = 0;
    140     output[chars++] = '"';
    141     for (i = 0; i < input_chars; i++) {
    142         Py_UNICODE c = (Py_UNICODE)input_str[i];
    143         if (S_CHAR(c)) {
    144             output[chars++] = (char)c;
    145         } else if (c > 0x7F) {
    146             /* We hit a non-ASCII character, bail to unicode mode */
    147             PyObject *uni;
    148             Py_DECREF(rval);
    149             uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
    150             if (uni == NULL) {
    151                 return NULL;
    152             }
    153             rval = ascii_escape_unicode(uni);
    154             Py_DECREF(uni);
    155             return rval;
    156         } else {
    157             chars = ascii_escape_char(c, output, chars);
    158         }
    159         /* An ASCII char can't possibly expand to a surrogate! */
    160         if (output_size - chars < (1 + MIN_EXPANSION)) {
    161             /* There's more than four, so let's resize by a lot */
    162             output_size *= 2;
    163             if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
    164                 output_size = 2 + (input_chars * MIN_EXPANSION);
    165             }
    166             if (_PyString_Resize(&rval, output_size) == -1) {
    167                 return NULL;
    168             }
    169             output = PyString_AS_STRING(rval);
    170         }
    171     }
    172     output[chars++] = '"';
    173     if (_PyString_Resize(&rval, chars) == -1) {
    174         return NULL;
    175     }
    176     return rval;
    177 }
    178 
    179 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
    180     "encode_basestring_ascii(basestring) -> str\n"
    181     "\n"
    182     "..."
    183 );
    184 
    185 static PyObject *
    186 py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) {
    187     /* METH_O */
    188     if (PyString_Check(pystr)) {
    189         return ascii_escape_str(pystr);
    190     } else if (PyUnicode_Check(pystr)) {
    191         return ascii_escape_unicode(pystr);
    192     }
    193     PyErr_SetString(PyExc_TypeError, "first argument must be a string");
    194     return NULL;
    195 }
    196 
    197 #define DEFN(n, k) \
    198     {  \
    199         #n, \
    200         (PyCFunction)py_ ##n, \
    201         k, \
    202         pydoc_ ##n \
    203     }
    204 static PyMethodDef speedups_methods[] = {
    205     DEFN(encode_basestring_ascii, METH_O),
    206     {}
    207 };
    208 #undef DEFN
    209 
    210 void
    211 init_speedups(void)
    212 {
    213     PyObject *m;
    214     m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION);
    215 }
    216