Home | History | Annotate | Download | only in Python
      1 #include "Python.h"
      2 #include "osdefs.h"
      3 #include <locale.h>
      4 
      5 #ifdef MS_WINDOWS
      6 #  include <malloc.h>
      7 #  include <windows.h>
      8 extern int winerror_to_errno(int);
      9 #endif
     10 
     11 #ifdef HAVE_LANGINFO_H
     12 #include <langinfo.h>
     13 #endif
     14 
     15 #ifdef HAVE_SYS_IOCTL_H
     16 #include <sys/ioctl.h>
     17 #endif
     18 
     19 #ifdef HAVE_FCNTL_H
     20 #include <fcntl.h>
     21 #endif /* HAVE_FCNTL_H */
     22 
     23 #ifdef O_CLOEXEC
     24 /* Does open() support the O_CLOEXEC flag? Possible values:
     25 
     26    -1: unknown
     27     0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
     28     1: open() supports O_CLOEXEC flag, close-on-exec is set
     29 
     30    The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
     31    and os.open(). */
     32 int _Py_open_cloexec_works = -1;
     33 #endif
     34 
     35 PyObject *
     36 _Py_device_encoding(int fd)
     37 {
     38 #if defined(MS_WINDOWS)
     39     UINT cp;
     40 #endif
     41     int valid;
     42     _Py_BEGIN_SUPPRESS_IPH
     43     valid = isatty(fd);
     44     _Py_END_SUPPRESS_IPH
     45     if (!valid)
     46         Py_RETURN_NONE;
     47 
     48 #if defined(MS_WINDOWS)
     49     if (fd == 0)
     50         cp = GetConsoleCP();
     51     else if (fd == 1 || fd == 2)
     52         cp = GetConsoleOutputCP();
     53     else
     54         cp = 0;
     55     /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
     56        has no console */
     57     if (cp != 0)
     58         return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
     59 #elif defined(CODESET)
     60     {
     61         char *codeset = nl_langinfo(CODESET);
     62         if (codeset != NULL && codeset[0] != 0)
     63             return PyUnicode_FromString(codeset);
     64     }
     65 #endif
     66     Py_RETURN_NONE;
     67 }
     68 
     69 #if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
     70 
     71 #define USE_FORCE_ASCII
     72 
     73 extern int _Py_normalize_encoding(const char *, char *, size_t);
     74 
     75 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
     76    On these operating systems, nl_langinfo(CODESET) announces an alias of the
     77    ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
     78    ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
     79    locale.getpreferredencoding() codec. For example, if command line arguments
     80    are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
     81    UnicodeEncodeError instead of retrieving the original byte string.
     82 
     83    The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
     84    nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
     85    one byte in range 0x80-0xff can be decoded from the locale encoding. The
     86    workaround is also enabled on error, for example if getting the locale
     87    failed.
     88 
     89    Values of force_ascii:
     90 
     91        1: the workaround is used: Py_EncodeLocale() uses
     92           encode_ascii_surrogateescape() and Py_DecodeLocale() uses
     93           decode_ascii()
     94        0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
     95           Py_DecodeLocale() uses mbstowcs()
     96       -1: unknown, need to call check_force_ascii() to get the value
     97 */
     98 static int force_ascii = -1;
     99 
    100 static int
    101 check_force_ascii(void)
    102 {
    103     char *loc;
    104 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
    105     char *codeset, **alias;
    106     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
    107     int is_ascii;
    108     unsigned int i;
    109     char* ascii_aliases[] = {
    110         "ascii",
    111         /* Aliases from Lib/encodings/aliases.py */
    112         "646",
    113         "ansi_x3.4_1968",
    114         "ansi_x3.4_1986",
    115         "ansi_x3_4_1968",
    116         "cp367",
    117         "csascii",
    118         "ibm367",
    119         "iso646_us",
    120         "iso_646.irv_1991",
    121         "iso_ir_6",
    122         "us",
    123         "us_ascii",
    124         NULL
    125     };
    126 #endif
    127 
    128     loc = setlocale(LC_CTYPE, NULL);
    129     if (loc == NULL)
    130         goto error;
    131     if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
    132         /* the LC_CTYPE locale is different than C */
    133         return 0;
    134     }
    135 
    136 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
    137     codeset = nl_langinfo(CODESET);
    138     if (!codeset || codeset[0] == '\0') {
    139         /* CODESET is not set or empty */
    140         goto error;
    141     }
    142     if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
    143         goto error;
    144 
    145     is_ascii = 0;
    146     for (alias=ascii_aliases; *alias != NULL; alias++) {
    147         if (strcmp(encoding, *alias) == 0) {
    148             is_ascii = 1;
    149             break;
    150         }
    151     }
    152     if (!is_ascii) {
    153         /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
    154         return 0;
    155     }
    156 
    157     for (i=0x80; i<0xff; i++) {
    158         unsigned char ch;
    159         wchar_t wch;
    160         size_t res;
    161 
    162         ch = (unsigned char)i;
    163         res = mbstowcs(&wch, (char*)&ch, 1);
    164         if (res != (size_t)-1) {
    165             /* decoding a non-ASCII character from the locale encoding succeed:
    166                the locale encoding is not ASCII, force ASCII */
    167             return 1;
    168         }
    169     }
    170     /* None of the bytes in the range 0x80-0xff can be decoded from the locale
    171        encoding: the locale encoding is really ASCII */
    172     return 0;
    173 #else
    174     /* nl_langinfo(CODESET) is not available: always force ASCII */
    175     return 1;
    176 #endif
    177 
    178 error:
    179     /* if an error occurred, force the ASCII encoding */
    180     return 1;
    181 }
    182 
    183 
    184 int
    185 _Py_GetForceASCII(void)
    186 {
    187     if (force_ascii == -1) {
    188         force_ascii = check_force_ascii();
    189     }
    190     return force_ascii;
    191 }
    192 
    193 
    194 void
    195 _Py_ResetForceASCII(void)
    196 {
    197     force_ascii = -1;
    198 }
    199 
    200 
    201 static int
    202 encode_ascii(const wchar_t *text, char **str,
    203              size_t *error_pos, const char **reason,
    204              int raw_malloc, int surrogateescape)
    205 {
    206     char *result = NULL, *out;
    207     size_t len, i;
    208     wchar_t ch;
    209 
    210     len = wcslen(text);
    211 
    212     /* +1 for NULL byte */
    213     if (raw_malloc) {
    214         result = PyMem_RawMalloc(len + 1);
    215     }
    216     else {
    217         result = PyMem_Malloc(len + 1);
    218     }
    219     if (result == NULL) {
    220         return -1;
    221     }
    222 
    223     out = result;
    224     for (i=0; i<len; i++) {
    225         ch = text[i];
    226 
    227         if (ch <= 0x7f) {
    228             /* ASCII character */
    229             *out++ = (char)ch;
    230         }
    231         else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
    232             /* UTF-8b surrogate */
    233             *out++ = (char)(ch - 0xdc00);
    234         }
    235         else {
    236             if (raw_malloc) {
    237                 PyMem_RawFree(result);
    238             }
    239             else {
    240                 PyMem_Free(result);
    241             }
    242             if (error_pos != NULL) {
    243                 *error_pos = i;
    244             }
    245             if (reason) {
    246                 *reason = "encoding error";
    247             }
    248             return -2;
    249         }
    250     }
    251     *out = '\0';
    252     *str = result;
    253     return 0;
    254 }
    255 #else
    256 int
    257 _Py_GetForceASCII(void)
    258 {
    259     return 0;
    260 }
    261 
    262 void
    263 _Py_ResetForceASCII(void)
    264 {
    265     /* nothing to do */
    266 }
    267 #endif   /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
    268 
    269 
    270 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
    271 static int
    272 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
    273              const char **reason, int surrogateescape)
    274 {
    275     wchar_t *res;
    276     unsigned char *in;
    277     wchar_t *out;
    278     size_t argsize = strlen(arg) + 1;
    279 
    280     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
    281         return -1;
    282     }
    283     res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
    284     if (!res) {
    285         return -1;
    286     }
    287 
    288     out = res;
    289     for (in = (unsigned char*)arg; *in; in++) {
    290         unsigned char ch = *in;
    291         if (ch < 128) {
    292             *out++ = ch;
    293         }
    294         else {
    295             if (!surrogateescape) {
    296                 PyMem_RawFree(res);
    297                 if (wlen) {
    298                     *wlen = in - (unsigned char*)arg;
    299                 }
    300                 if (reason) {
    301                     *reason = "decoding error";
    302                 }
    303                 return -2;
    304             }
    305             *out++ = 0xdc00 + ch;
    306         }
    307     }
    308     *out = 0;
    309 
    310     if (wlen != NULL) {
    311         *wlen = out - res;
    312     }
    313     *wstr = res;
    314     return 0;
    315 }
    316 #endif   /* !HAVE_MBRTOWC */
    317 
    318 static int
    319 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
    320                       const char **reason, int surrogateescape)
    321 {
    322     wchar_t *res;
    323     size_t argsize;
    324     size_t count;
    325 #ifdef HAVE_MBRTOWC
    326     unsigned char *in;
    327     wchar_t *out;
    328     mbstate_t mbs;
    329 #endif
    330 
    331 #ifdef HAVE_BROKEN_MBSTOWCS
    332     /* Some platforms have a broken implementation of
    333      * mbstowcs which does not count the characters that
    334      * would result from conversion.  Use an upper bound.
    335      */
    336     argsize = strlen(arg);
    337 #else
    338     argsize = mbstowcs(NULL, arg, 0);
    339 #endif
    340     if (argsize != (size_t)-1) {
    341         if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
    342             return -1;
    343         }
    344         res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
    345         if (!res) {
    346             return -1;
    347         }
    348 
    349         count = mbstowcs(res, arg, argsize + 1);
    350         if (count != (size_t)-1) {
    351             wchar_t *tmp;
    352             /* Only use the result if it contains no
    353                surrogate characters. */
    354             for (tmp = res; *tmp != 0 &&
    355                          !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
    356                 ;
    357             if (*tmp == 0) {
    358                 if (wlen != NULL) {
    359                     *wlen = count;
    360                 }
    361                 *wstr = res;
    362                 return 0;
    363             }
    364         }
    365         PyMem_RawFree(res);
    366     }
    367 
    368     /* Conversion failed. Fall back to escaping with surrogateescape. */
    369 #ifdef HAVE_MBRTOWC
    370     /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
    371 
    372     /* Overallocate; as multi-byte characters are in the argument, the
    373        actual output could use less memory. */
    374     argsize = strlen(arg) + 1;
    375     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
    376         return -1;
    377     }
    378     res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
    379     if (!res) {
    380         return -1;
    381     }
    382 
    383     in = (unsigned char*)arg;
    384     out = res;
    385     memset(&mbs, 0, sizeof mbs);
    386     while (argsize) {
    387         size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
    388         if (converted == 0) {
    389             /* Reached end of string; null char stored. */
    390             break;
    391         }
    392 
    393         if (converted == (size_t)-2) {
    394             /* Incomplete character. This should never happen,
    395                since we provide everything that we have -
    396                unless there is a bug in the C library, or I
    397                misunderstood how mbrtowc works. */
    398             goto decode_error;
    399         }
    400 
    401         if (converted == (size_t)-1) {
    402             if (!surrogateescape) {
    403                 goto decode_error;
    404             }
    405 
    406             /* Conversion error. Escape as UTF-8b, and start over
    407                in the initial shift state. */
    408             *out++ = 0xdc00 + *in++;
    409             argsize--;
    410             memset(&mbs, 0, sizeof mbs);
    411             continue;
    412         }
    413 
    414         if (Py_UNICODE_IS_SURROGATE(*out)) {
    415             if (!surrogateescape) {
    416                 goto decode_error;
    417             }
    418 
    419             /* Surrogate character.  Escape the original
    420                byte sequence with surrogateescape. */
    421             argsize -= converted;
    422             while (converted--) {
    423                 *out++ = 0xdc00 + *in++;
    424             }
    425             continue;
    426         }
    427         /* successfully converted some bytes */
    428         in += converted;
    429         argsize -= converted;
    430         out++;
    431     }
    432     if (wlen != NULL) {
    433         *wlen = out - res;
    434     }
    435     *wstr = res;
    436     return 0;
    437 
    438 decode_error:
    439     PyMem_RawFree(res);
    440     if (wlen) {
    441         *wlen = in - (unsigned char*)arg;
    442     }
    443     if (reason) {
    444         *reason = "decoding error";
    445     }
    446     return -2;
    447 #else   /* HAVE_MBRTOWC */
    448     /* Cannot use C locale for escaping; manually escape as if charset
    449        is ASCII (i.e. escape all bytes > 128. This will still roundtrip
    450        correctly in the locale's charset, which must be an ASCII superset. */
    451     return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
    452 #endif   /* HAVE_MBRTOWC */
    453 }
    454 
    455 
    456 /* Decode a byte string from the locale encoding.
    457 
    458    Use the strict error handler if 'surrogateescape' is zero.  Use the
    459    surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
    460    bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
    461    can be decoded as a surrogate character, escape the bytes using the
    462    surrogateescape error handler instead of decoding them.
    463 
    464    On success, return 0 and write the newly allocated wide character string into
    465    *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
    466    the number of wide characters excluding the null character into *wlen.
    467 
    468    On memory allocation failure, return -1.
    469 
    470    On decoding error, return -2. If wlen is not NULL, write the start of
    471    invalid byte sequence in the input string into *wlen. If reason is not NULL,
    472    write the decoding error message into *reason.
    473 
    474    Use the Py_EncodeLocaleEx() function to encode the character string back to
    475    a byte string. */
    476 int
    477 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
    478                    const char **reason,
    479                    int current_locale, int surrogateescape)
    480 {
    481     if (current_locale) {
    482 #ifdef __ANDROID__
    483         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
    484                                 surrogateescape);
    485 #else
    486         return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
    487 #endif
    488     }
    489 
    490 #if defined(__APPLE__) || defined(__ANDROID__)
    491     return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
    492                             surrogateescape);
    493 #else
    494     if (Py_UTF8Mode == 1) {
    495         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
    496                                 surrogateescape);
    497     }
    498 
    499 #ifdef USE_FORCE_ASCII
    500     if (force_ascii == -1) {
    501         force_ascii = check_force_ascii();
    502     }
    503 
    504     if (force_ascii) {
    505         /* force ASCII encoding to workaround mbstowcs() issue */
    506         return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
    507     }
    508 #endif
    509 
    510     return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
    511 #endif   /* __APPLE__ or __ANDROID__ */
    512 }
    513 
    514 
    515 /* Decode a byte string from the locale encoding with the
    516    surrogateescape error handler: undecodable bytes are decoded as characters
    517    in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
    518    character, escape the bytes using the surrogateescape error handler instead
    519    of decoding them.
    520 
    521    Return a pointer to a newly allocated wide character string, use
    522    PyMem_RawFree() to free the memory. If size is not NULL, write the number of
    523    wide characters excluding the null character into *size
    524 
    525    Return NULL on decoding error or memory allocation error. If *size* is not
    526    NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
    527    decoding error.
    528 
    529    Decoding errors should never happen, unless there is a bug in the C
    530    library.
    531 
    532    Use the Py_EncodeLocale() function to encode the character string back to a
    533    byte string. */
    534 wchar_t*
    535 Py_DecodeLocale(const char* arg, size_t *wlen)
    536 {
    537     wchar_t *wstr;
    538     int res = _Py_DecodeLocaleEx(arg, &wstr, wlen, NULL, 0, 1);
    539     if (res != 0) {
    540         if (wlen != NULL) {
    541             *wlen = (size_t)res;
    542         }
    543         return NULL;
    544     }
    545     return wstr;
    546 }
    547 
    548 
    549 static int
    550 encode_current_locale(const wchar_t *text, char **str,
    551                       size_t *error_pos, const char **reason,
    552                       int raw_malloc, int surrogateescape)
    553 {
    554     const size_t len = wcslen(text);
    555     char *result = NULL, *bytes = NULL;
    556     size_t i, size, converted;
    557     wchar_t c, buf[2];
    558 
    559     /* The function works in two steps:
    560        1. compute the length of the output buffer in bytes (size)
    561        2. outputs the bytes */
    562     size = 0;
    563     buf[1] = 0;
    564     while (1) {
    565         for (i=0; i < len; i++) {
    566             c = text[i];
    567             if (c >= 0xdc80 && c <= 0xdcff) {
    568                 if (!surrogateescape) {
    569                     goto encode_error;
    570                 }
    571                 /* UTF-8b surrogate */
    572                 if (bytes != NULL) {
    573                     *bytes++ = c - 0xdc00;
    574                     size--;
    575                 }
    576                 else {
    577                     size++;
    578                 }
    579                 continue;
    580             }
    581             else {
    582                 buf[0] = c;
    583                 if (bytes != NULL) {
    584                     converted = wcstombs(bytes, buf, size);
    585                 }
    586                 else {
    587                     converted = wcstombs(NULL, buf, 0);
    588                 }
    589                 if (converted == (size_t)-1) {
    590                     goto encode_error;
    591                 }
    592                 if (bytes != NULL) {
    593                     bytes += converted;
    594                     size -= converted;
    595                 }
    596                 else {
    597                     size += converted;
    598                 }
    599             }
    600         }
    601         if (result != NULL) {
    602             *bytes = '\0';
    603             break;
    604         }
    605 
    606         size += 1; /* nul byte at the end */
    607         if (raw_malloc) {
    608             result = PyMem_RawMalloc(size);
    609         }
    610         else {
    611             result = PyMem_Malloc(size);
    612         }
    613         if (result == NULL) {
    614             return -1;
    615         }
    616         bytes = result;
    617     }
    618     *str = result;
    619     return 0;
    620 
    621 encode_error:
    622     if (raw_malloc) {
    623         PyMem_RawFree(result);
    624     }
    625     else {
    626         PyMem_Free(result);
    627     }
    628     if (error_pos != NULL) {
    629         *error_pos = i;
    630     }
    631     if (reason) {
    632         *reason = "encoding error";
    633     }
    634     return -2;
    635 }
    636 
    637 static int
    638 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
    639                  const char **reason,
    640                  int raw_malloc, int current_locale, int surrogateescape)
    641 {
    642     if (current_locale) {
    643 #ifdef __ANDROID__
    644         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
    645                                 raw_malloc, surrogateescape);
    646 #else
    647         return encode_current_locale(text, str, error_pos, reason,
    648                                      raw_malloc, surrogateescape);
    649 #endif
    650     }
    651 
    652 #if defined(__APPLE__) || defined(__ANDROID__)
    653     return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
    654                             raw_malloc, surrogateescape);
    655 #else   /* __APPLE__ */
    656     if (Py_UTF8Mode == 1) {
    657         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
    658                                 raw_malloc, surrogateescape);
    659     }
    660 
    661 #ifdef USE_FORCE_ASCII
    662     if (force_ascii == -1) {
    663         force_ascii = check_force_ascii();
    664     }
    665 
    666     if (force_ascii) {
    667         return encode_ascii(text, str, error_pos, reason,
    668                             raw_malloc, surrogateescape);
    669     }
    670 #endif
    671 
    672     return encode_current_locale(text, str, error_pos, reason,
    673                                  raw_malloc, surrogateescape);
    674 #endif   /* __APPLE__ or __ANDROID__ */
    675 }
    676 
    677 static char*
    678 encode_locale(const wchar_t *text, size_t *error_pos,
    679               int raw_malloc, int current_locale)
    680 {
    681     char *str;
    682     int res = encode_locale_ex(text, &str, error_pos, NULL,
    683                                raw_malloc, current_locale, 1);
    684     if (res != -2 && error_pos) {
    685         *error_pos = (size_t)-1;
    686     }
    687     if (res != 0) {
    688         return NULL;
    689     }
    690     return str;
    691 }
    692 
    693 /* Encode a wide character string to the locale encoding with the
    694    surrogateescape error handler: surrogate characters in the range
    695    U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
    696 
    697    Return a pointer to a newly allocated byte string, use PyMem_Free() to free
    698    the memory. Return NULL on encoding or memory allocation error.
    699 
    700    If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
    701    to the index of the invalid character on encoding error.
    702 
    703    Use the Py_DecodeLocale() function to decode the bytes string back to a wide
    704    character string. */
    705 char*
    706 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
    707 {
    708     return encode_locale(text, error_pos, 0, 0);
    709 }
    710 
    711 
    712 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
    713    instead of PyMem_Free(). */
    714 char*
    715 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
    716 {
    717     return encode_locale(text, error_pos, 1, 0);
    718 }
    719 
    720 
    721 int
    722 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
    723                    size_t *error_pos, const char **reason,
    724                    int current_locale, int surrogateescape)
    725 {
    726     return encode_locale_ex(text, str, error_pos, reason, 1,
    727                             current_locale, surrogateescape);
    728 }
    729 
    730 
    731 #ifdef MS_WINDOWS
    732 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
    733 
    734 static void
    735 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
    736 {
    737     /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
    738     /* Cannot simply cast and dereference in_ptr,
    739        since it might not be aligned properly */
    740     __int64 in;
    741     memcpy(&in, in_ptr, sizeof(in));
    742     *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
    743     *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
    744 }
    745 
    746 void
    747 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
    748 {
    749     /* XXX endianness */
    750     __int64 out;
    751     out = time_in + secs_between_epochs;
    752     out = out * 10000000 + nsec_in / 100;
    753     memcpy(out_ptr, &out, sizeof(out));
    754 }
    755 
    756 /* Below, we *know* that ugo+r is 0444 */
    757 #if _S_IREAD != 0400
    758 #error Unsupported C library
    759 #endif
    760 static int
    761 attributes_to_mode(DWORD attr)
    762 {
    763     int m = 0;
    764     if (attr & FILE_ATTRIBUTE_DIRECTORY)
    765         m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
    766     else
    767         m |= _S_IFREG;
    768     if (attr & FILE_ATTRIBUTE_READONLY)
    769         m |= 0444;
    770     else
    771         m |= 0666;
    772     return m;
    773 }
    774 
    775 void
    776 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
    777                            struct _Py_stat_struct *result)
    778 {
    779     memset(result, 0, sizeof(*result));
    780     result->st_mode = attributes_to_mode(info->dwFileAttributes);
    781     result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
    782     result->st_dev = info->dwVolumeSerialNumber;
    783     result->st_rdev = result->st_dev;
    784     FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
    785     FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
    786     FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
    787     result->st_nlink = info->nNumberOfLinks;
    788     result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
    789     if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
    790         /* first clear the S_IFMT bits */
    791         result->st_mode ^= (result->st_mode & S_IFMT);
    792         /* now set the bits that make this a symlink */
    793         result->st_mode |= S_IFLNK;
    794     }
    795     result->st_file_attributes = info->dwFileAttributes;
    796 }
    797 #endif
    798 
    799 /* Return information about a file.
    800 
    801    On POSIX, use fstat().
    802 
    803    On Windows, use GetFileType() and GetFileInformationByHandle() which support
    804    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
    805    than 2 GiB because the file size type is a signed 32-bit integer: see issue
    806    #23152.
    807 
    808    On Windows, set the last Windows error and return nonzero on error. On
    809    POSIX, set errno and return nonzero on error. Fill status and return 0 on
    810    success. */
    811 int
    812 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
    813 {
    814 #ifdef MS_WINDOWS
    815     BY_HANDLE_FILE_INFORMATION info;
    816     HANDLE h;
    817     int type;
    818 
    819     _Py_BEGIN_SUPPRESS_IPH
    820     h = (HANDLE)_get_osfhandle(fd);
    821     _Py_END_SUPPRESS_IPH
    822 
    823     if (h == INVALID_HANDLE_VALUE) {
    824         /* errno is already set by _get_osfhandle, but we also set
    825            the Win32 error for callers who expect that */
    826         SetLastError(ERROR_INVALID_HANDLE);
    827         return -1;
    828     }
    829     memset(status, 0, sizeof(*status));
    830 
    831     type = GetFileType(h);
    832     if (type == FILE_TYPE_UNKNOWN) {
    833         DWORD error = GetLastError();
    834         if (error != 0) {
    835             errno = winerror_to_errno(error);
    836             return -1;
    837         }
    838         /* else: valid but unknown file */
    839     }
    840 
    841     if (type != FILE_TYPE_DISK) {
    842         if (type == FILE_TYPE_CHAR)
    843             status->st_mode = _S_IFCHR;
    844         else if (type == FILE_TYPE_PIPE)
    845             status->st_mode = _S_IFIFO;
    846         return 0;
    847     }
    848 
    849     if (!GetFileInformationByHandle(h, &info)) {
    850         /* The Win32 error is already set, but we also set errno for
    851            callers who expect it */
    852         errno = winerror_to_errno(GetLastError());
    853         return -1;
    854     }
    855 
    856     _Py_attribute_data_to_stat(&info, 0, status);
    857     /* specific to fstat() */
    858     status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
    859     return 0;
    860 #else
    861     return fstat(fd, status);
    862 #endif
    863 }
    864 
    865 /* Return information about a file.
    866 
    867    On POSIX, use fstat().
    868 
    869    On Windows, use GetFileType() and GetFileInformationByHandle() which support
    870    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
    871    than 2 GiB because the file size type is a signed 32-bit integer: see issue
    872    #23152.
    873 
    874    Raise an exception and return -1 on error. On Windows, set the last Windows
    875    error on error. On POSIX, set errno on error. Fill status and return 0 on
    876    success.
    877 
    878    Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
    879    to call fstat(). The caller must hold the GIL. */
    880 int
    881 _Py_fstat(int fd, struct _Py_stat_struct *status)
    882 {
    883     int res;
    884 
    885     assert(PyGILState_Check());
    886 
    887     Py_BEGIN_ALLOW_THREADS
    888     res = _Py_fstat_noraise(fd, status);
    889     Py_END_ALLOW_THREADS
    890 
    891     if (res != 0) {
    892 #ifdef MS_WINDOWS
    893         PyErr_SetFromWindowsErr(0);
    894 #else
    895         PyErr_SetFromErrno(PyExc_OSError);
    896 #endif
    897         return -1;
    898     }
    899     return 0;
    900 }
    901 
    902 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
    903    call stat() otherwise. Only fill st_mode attribute on Windows.
    904 
    905    Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
    906    raised. */
    907 
    908 int
    909 _Py_stat(PyObject *path, struct stat *statbuf)
    910 {
    911 #ifdef MS_WINDOWS
    912     int err;
    913     struct _stat wstatbuf;
    914     const wchar_t *wpath;
    915 
    916     wpath = _PyUnicode_AsUnicode(path);
    917     if (wpath == NULL)
    918         return -2;
    919 
    920     err = _wstat(wpath, &wstatbuf);
    921     if (!err)
    922         statbuf->st_mode = wstatbuf.st_mode;
    923     return err;
    924 #else
    925     int ret;
    926     PyObject *bytes;
    927     char *cpath;
    928 
    929     bytes = PyUnicode_EncodeFSDefault(path);
    930     if (bytes == NULL)
    931         return -2;
    932 
    933     /* check for embedded null bytes */
    934     if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
    935         Py_DECREF(bytes);
    936         return -2;
    937     }
    938 
    939     ret = stat(cpath, statbuf);
    940     Py_DECREF(bytes);
    941     return ret;
    942 #endif
    943 }
    944 
    945 
    946 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
    947 static int
    948 get_inheritable(int fd, int raise)
    949 {
    950 #ifdef MS_WINDOWS
    951     HANDLE handle;
    952     DWORD flags;
    953 
    954     _Py_BEGIN_SUPPRESS_IPH
    955     handle = (HANDLE)_get_osfhandle(fd);
    956     _Py_END_SUPPRESS_IPH
    957     if (handle == INVALID_HANDLE_VALUE) {
    958         if (raise)
    959             PyErr_SetFromErrno(PyExc_OSError);
    960         return -1;
    961     }
    962 
    963     if (!GetHandleInformation(handle, &flags)) {
    964         if (raise)
    965             PyErr_SetFromWindowsErr(0);
    966         return -1;
    967     }
    968 
    969     return (flags & HANDLE_FLAG_INHERIT);
    970 #else
    971     int flags;
    972 
    973     flags = fcntl(fd, F_GETFD, 0);
    974     if (flags == -1) {
    975         if (raise)
    976             PyErr_SetFromErrno(PyExc_OSError);
    977         return -1;
    978     }
    979     return !(flags & FD_CLOEXEC);
    980 #endif
    981 }
    982 
    983 /* Get the inheritable flag of the specified file descriptor.
    984    Return 1 if the file descriptor can be inherited, 0 if it cannot,
    985    raise an exception and return -1 on error. */
    986 int
    987 _Py_get_inheritable(int fd)
    988 {
    989     return get_inheritable(fd, 1);
    990 }
    991 
    992 
    993 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
    994 static int
    995 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
    996 {
    997 #ifdef MS_WINDOWS
    998     HANDLE handle;
    999     DWORD flags;
   1000 #else
   1001 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
   1002     static int ioctl_works = -1;
   1003     int request;
   1004     int err;
   1005 #endif
   1006     int flags, new_flags;
   1007     int res;
   1008 #endif
   1009 
   1010     /* atomic_flag_works can only be used to make the file descriptor
   1011        non-inheritable */
   1012     assert(!(atomic_flag_works != NULL && inheritable));
   1013 
   1014     if (atomic_flag_works != NULL && !inheritable) {
   1015         if (*atomic_flag_works == -1) {
   1016             int isInheritable = get_inheritable(fd, raise);
   1017             if (isInheritable == -1)
   1018                 return -1;
   1019             *atomic_flag_works = !isInheritable;
   1020         }
   1021 
   1022         if (*atomic_flag_works)
   1023             return 0;
   1024     }
   1025 
   1026 #ifdef MS_WINDOWS
   1027     _Py_BEGIN_SUPPRESS_IPH
   1028     handle = (HANDLE)_get_osfhandle(fd);
   1029     _Py_END_SUPPRESS_IPH
   1030     if (handle == INVALID_HANDLE_VALUE) {
   1031         if (raise)
   1032             PyErr_SetFromErrno(PyExc_OSError);
   1033         return -1;
   1034     }
   1035 
   1036     if (inheritable)
   1037         flags = HANDLE_FLAG_INHERIT;
   1038     else
   1039         flags = 0;
   1040     if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
   1041         if (raise)
   1042             PyErr_SetFromWindowsErr(0);
   1043         return -1;
   1044     }
   1045     return 0;
   1046 
   1047 #else
   1048 
   1049 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
   1050     if (ioctl_works != 0 && raise != 0) {
   1051         /* fast-path: ioctl() only requires one syscall */
   1052         /* caveat: raise=0 is an indicator that we must be async-signal-safe
   1053          * thus avoid using ioctl() so we skip the fast-path. */
   1054         if (inheritable)
   1055             request = FIONCLEX;
   1056         else
   1057             request = FIOCLEX;
   1058         err = ioctl(fd, request, NULL);
   1059         if (!err) {
   1060             ioctl_works = 1;
   1061             return 0;
   1062         }
   1063 
   1064         if (errno != ENOTTY && errno != EACCES) {
   1065             if (raise)
   1066                 PyErr_SetFromErrno(PyExc_OSError);
   1067             return -1;
   1068         }
   1069         else {
   1070             /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
   1071                device". The ioctl is declared but not supported by the kernel.
   1072                Remember that ioctl() doesn't work. It is the case on
   1073                Illumos-based OS for example.
   1074 
   1075                Issue #27057: When SELinux policy disallows ioctl it will fail
   1076                with EACCES. While FIOCLEX is safe operation it may be
   1077                unavailable because ioctl was denied altogether.
   1078                This can be the case on Android. */
   1079             ioctl_works = 0;
   1080         }
   1081         /* fallback to fcntl() if ioctl() does not work */
   1082     }
   1083 #endif
   1084 
   1085     /* slow-path: fcntl() requires two syscalls */
   1086     flags = fcntl(fd, F_GETFD);
   1087     if (flags < 0) {
   1088         if (raise)
   1089             PyErr_SetFromErrno(PyExc_OSError);
   1090         return -1;
   1091     }
   1092 
   1093     if (inheritable) {
   1094         new_flags = flags & ~FD_CLOEXEC;
   1095     }
   1096     else {
   1097         new_flags = flags | FD_CLOEXEC;
   1098     }
   1099 
   1100     if (new_flags == flags) {
   1101         /* FD_CLOEXEC flag already set/cleared: nothing to do */
   1102         return 0;
   1103     }
   1104 
   1105     res = fcntl(fd, F_SETFD, new_flags);
   1106     if (res < 0) {
   1107         if (raise)
   1108             PyErr_SetFromErrno(PyExc_OSError);
   1109         return -1;
   1110     }
   1111     return 0;
   1112 #endif
   1113 }
   1114 
   1115 /* Make the file descriptor non-inheritable.
   1116    Return 0 on success, set errno and return -1 on error. */
   1117 static int
   1118 make_non_inheritable(int fd)
   1119 {
   1120     return set_inheritable(fd, 0, 0, NULL);
   1121 }
   1122 
   1123 /* Set the inheritable flag of the specified file descriptor.
   1124    On success: return 0, on error: raise an exception and return -1.
   1125 
   1126    If atomic_flag_works is not NULL:
   1127 
   1128     * if *atomic_flag_works==-1, check if the inheritable is set on the file
   1129       descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
   1130       set the inheritable flag
   1131     * if *atomic_flag_works==1: do nothing
   1132     * if *atomic_flag_works==0: set inheritable flag to False
   1133 
   1134    Set atomic_flag_works to NULL if no atomic flag was used to create the
   1135    file descriptor.
   1136 
   1137    atomic_flag_works can only be used to make a file descriptor
   1138    non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
   1139 int
   1140 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
   1141 {
   1142     return set_inheritable(fd, inheritable, 1, atomic_flag_works);
   1143 }
   1144 
   1145 /* Same as _Py_set_inheritable() but on error, set errno and
   1146    don't raise an exception.
   1147    This function is async-signal-safe. */
   1148 int
   1149 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
   1150 {
   1151     return set_inheritable(fd, inheritable, 0, atomic_flag_works);
   1152 }
   1153 
   1154 static int
   1155 _Py_open_impl(const char *pathname, int flags, int gil_held)
   1156 {
   1157     int fd;
   1158     int async_err = 0;
   1159 #ifndef MS_WINDOWS
   1160     int *atomic_flag_works;
   1161 #endif
   1162 
   1163 #ifdef MS_WINDOWS
   1164     flags |= O_NOINHERIT;
   1165 #elif defined(O_CLOEXEC)
   1166     atomic_flag_works = &_Py_open_cloexec_works;
   1167     flags |= O_CLOEXEC;
   1168 #else
   1169     atomic_flag_works = NULL;
   1170 #endif
   1171 
   1172     if (gil_held) {
   1173         do {
   1174             Py_BEGIN_ALLOW_THREADS
   1175             fd = open(pathname, flags);
   1176             Py_END_ALLOW_THREADS
   1177         } while (fd < 0
   1178                  && errno == EINTR && !(async_err = PyErr_CheckSignals()));
   1179         if (async_err)
   1180             return -1;
   1181         if (fd < 0) {
   1182             PyErr_SetFromErrnoWithFilename(PyExc_OSError, pathname);
   1183             return -1;
   1184         }
   1185     }
   1186     else {
   1187         fd = open(pathname, flags);
   1188         if (fd < 0)
   1189             return -1;
   1190     }
   1191 
   1192 #ifndef MS_WINDOWS
   1193     if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
   1194         close(fd);
   1195         return -1;
   1196     }
   1197 #endif
   1198 
   1199     return fd;
   1200 }
   1201 
   1202 /* Open a file with the specified flags (wrapper to open() function).
   1203    Return a file descriptor on success. Raise an exception and return -1 on
   1204    error.
   1205 
   1206    The file descriptor is created non-inheritable.
   1207 
   1208    When interrupted by a signal (open() fails with EINTR), retry the syscall,
   1209    except if the Python signal handler raises an exception.
   1210 
   1211    Release the GIL to call open(). The caller must hold the GIL. */
   1212 int
   1213 _Py_open(const char *pathname, int flags)
   1214 {
   1215     /* _Py_open() must be called with the GIL held. */
   1216     assert(PyGILState_Check());
   1217     return _Py_open_impl(pathname, flags, 1);
   1218 }
   1219 
   1220 /* Open a file with the specified flags (wrapper to open() function).
   1221    Return a file descriptor on success. Set errno and return -1 on error.
   1222 
   1223    The file descriptor is created non-inheritable.
   1224 
   1225    If interrupted by a signal, fail with EINTR. */
   1226 int
   1227 _Py_open_noraise(const char *pathname, int flags)
   1228 {
   1229     return _Py_open_impl(pathname, flags, 0);
   1230 }
   1231 
   1232 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
   1233    encoding and use fopen() otherwise.
   1234 
   1235    The file descriptor is created non-inheritable.
   1236 
   1237    If interrupted by a signal, fail with EINTR. */
   1238 FILE *
   1239 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
   1240 {
   1241     FILE *f;
   1242 #ifndef MS_WINDOWS
   1243     char *cpath;
   1244     char cmode[10];
   1245     size_t r;
   1246     r = wcstombs(cmode, mode, 10);
   1247     if (r == (size_t)-1 || r >= 10) {
   1248         errno = EINVAL;
   1249         return NULL;
   1250     }
   1251     cpath = _Py_EncodeLocaleRaw(path, NULL);
   1252     if (cpath == NULL) {
   1253         return NULL;
   1254     }
   1255     f = fopen(cpath, cmode);
   1256     PyMem_RawFree(cpath);
   1257 #else
   1258     f = _wfopen(path, mode);
   1259 #endif
   1260     if (f == NULL)
   1261         return NULL;
   1262     if (make_non_inheritable(fileno(f)) < 0) {
   1263         fclose(f);
   1264         return NULL;
   1265     }
   1266     return f;
   1267 }
   1268 
   1269 /* Wrapper to fopen().
   1270 
   1271    The file descriptor is created non-inheritable.
   1272 
   1273    If interrupted by a signal, fail with EINTR. */
   1274 FILE*
   1275 _Py_fopen(const char *pathname, const char *mode)
   1276 {
   1277     FILE *f = fopen(pathname, mode);
   1278     if (f == NULL)
   1279         return NULL;
   1280     if (make_non_inheritable(fileno(f)) < 0) {
   1281         fclose(f);
   1282         return NULL;
   1283     }
   1284     return f;
   1285 }
   1286 
   1287 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
   1288    encoding and call fopen() otherwise.
   1289 
   1290    Return the new file object on success. Raise an exception and return NULL
   1291    on error.
   1292 
   1293    The file descriptor is created non-inheritable.
   1294 
   1295    When interrupted by a signal (open() fails with EINTR), retry the syscall,
   1296    except if the Python signal handler raises an exception.
   1297 
   1298    Release the GIL to call _wfopen() or fopen(). The caller must hold
   1299    the GIL. */
   1300 FILE*
   1301 _Py_fopen_obj(PyObject *path, const char *mode)
   1302 {
   1303     FILE *f;
   1304     int async_err = 0;
   1305 #ifdef MS_WINDOWS
   1306     const wchar_t *wpath;
   1307     wchar_t wmode[10];
   1308     int usize;
   1309 
   1310     assert(PyGILState_Check());
   1311 
   1312     if (!PyUnicode_Check(path)) {
   1313         PyErr_Format(PyExc_TypeError,
   1314                      "str file path expected under Windows, got %R",
   1315                      Py_TYPE(path));
   1316         return NULL;
   1317     }
   1318     wpath = _PyUnicode_AsUnicode(path);
   1319     if (wpath == NULL)
   1320         return NULL;
   1321 
   1322     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
   1323                                 wmode, Py_ARRAY_LENGTH(wmode));
   1324     if (usize == 0) {
   1325         PyErr_SetFromWindowsErr(0);
   1326         return NULL;
   1327     }
   1328 
   1329     do {
   1330         Py_BEGIN_ALLOW_THREADS
   1331         f = _wfopen(wpath, wmode);
   1332         Py_END_ALLOW_THREADS
   1333     } while (f == NULL
   1334              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
   1335 #else
   1336     PyObject *bytes;
   1337     char *path_bytes;
   1338 
   1339     assert(PyGILState_Check());
   1340 
   1341     if (!PyUnicode_FSConverter(path, &bytes))
   1342         return NULL;
   1343     path_bytes = PyBytes_AS_STRING(bytes);
   1344 
   1345     do {
   1346         Py_BEGIN_ALLOW_THREADS
   1347         f = fopen(path_bytes, mode);
   1348         Py_END_ALLOW_THREADS
   1349     } while (f == NULL
   1350              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
   1351 
   1352     Py_DECREF(bytes);
   1353 #endif
   1354     if (async_err)
   1355         return NULL;
   1356 
   1357     if (f == NULL) {
   1358         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
   1359         return NULL;
   1360     }
   1361 
   1362     if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
   1363         fclose(f);
   1364         return NULL;
   1365     }
   1366     return f;
   1367 }
   1368 
   1369 /* Read count bytes from fd into buf.
   1370 
   1371    On success, return the number of read bytes, it can be lower than count.
   1372    If the current file offset is at or past the end of file, no bytes are read,
   1373    and read() returns zero.
   1374 
   1375    On error, raise an exception, set errno and return -1.
   1376 
   1377    When interrupted by a signal (read() fails with EINTR), retry the syscall.
   1378    If the Python signal handler raises an exception, the function returns -1
   1379    (the syscall is not retried).
   1380 
   1381    Release the GIL to call read(). The caller must hold the GIL. */
   1382 Py_ssize_t
   1383 _Py_read(int fd, void *buf, size_t count)
   1384 {
   1385     Py_ssize_t n;
   1386     int err;
   1387     int async_err = 0;
   1388 
   1389     assert(PyGILState_Check());
   1390 
   1391     /* _Py_read() must not be called with an exception set, otherwise the
   1392      * caller may think that read() was interrupted by a signal and the signal
   1393      * handler raised an exception. */
   1394     assert(!PyErr_Occurred());
   1395 
   1396     if (count > _PY_READ_MAX) {
   1397         count = _PY_READ_MAX;
   1398     }
   1399 
   1400     _Py_BEGIN_SUPPRESS_IPH
   1401     do {
   1402         Py_BEGIN_ALLOW_THREADS
   1403         errno = 0;
   1404 #ifdef MS_WINDOWS
   1405         n = read(fd, buf, (int)count);
   1406 #else
   1407         n = read(fd, buf, count);
   1408 #endif
   1409         /* save/restore errno because PyErr_CheckSignals()
   1410          * and PyErr_SetFromErrno() can modify it */
   1411         err = errno;
   1412         Py_END_ALLOW_THREADS
   1413     } while (n < 0 && err == EINTR &&
   1414             !(async_err = PyErr_CheckSignals()));
   1415     _Py_END_SUPPRESS_IPH
   1416 
   1417     if (async_err) {
   1418         /* read() was interrupted by a signal (failed with EINTR)
   1419          * and the Python signal handler raised an exception */
   1420         errno = err;
   1421         assert(errno == EINTR && PyErr_Occurred());
   1422         return -1;
   1423     }
   1424     if (n < 0) {
   1425         PyErr_SetFromErrno(PyExc_OSError);
   1426         errno = err;
   1427         return -1;
   1428     }
   1429 
   1430     return n;
   1431 }
   1432 
   1433 static Py_ssize_t
   1434 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
   1435 {
   1436     Py_ssize_t n;
   1437     int err;
   1438     int async_err = 0;
   1439 
   1440     _Py_BEGIN_SUPPRESS_IPH
   1441 #ifdef MS_WINDOWS
   1442     if (count > 32767 && isatty(fd)) {
   1443         /* Issue #11395: the Windows console returns an error (12: not
   1444            enough space error) on writing into stdout if stdout mode is
   1445            binary and the length is greater than 66,000 bytes (or less,
   1446            depending on heap usage). */
   1447         count = 32767;
   1448     }
   1449 #endif
   1450     if (count > _PY_WRITE_MAX) {
   1451         count = _PY_WRITE_MAX;
   1452     }
   1453 
   1454     if (gil_held) {
   1455         do {
   1456             Py_BEGIN_ALLOW_THREADS
   1457             errno = 0;
   1458 #ifdef MS_WINDOWS
   1459             n = write(fd, buf, (int)count);
   1460 #else
   1461             n = write(fd, buf, count);
   1462 #endif
   1463             /* save/restore errno because PyErr_CheckSignals()
   1464              * and PyErr_SetFromErrno() can modify it */
   1465             err = errno;
   1466             Py_END_ALLOW_THREADS
   1467         } while (n < 0 && err == EINTR &&
   1468                 !(async_err = PyErr_CheckSignals()));
   1469     }
   1470     else {
   1471         do {
   1472             errno = 0;
   1473 #ifdef MS_WINDOWS
   1474             n = write(fd, buf, (int)count);
   1475 #else
   1476             n = write(fd, buf, count);
   1477 #endif
   1478             err = errno;
   1479         } while (n < 0 && err == EINTR);
   1480     }
   1481     _Py_END_SUPPRESS_IPH
   1482 
   1483     if (async_err) {
   1484         /* write() was interrupted by a signal (failed with EINTR)
   1485            and the Python signal handler raised an exception (if gil_held is
   1486            nonzero). */
   1487         errno = err;
   1488         assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
   1489         return -1;
   1490     }
   1491     if (n < 0) {
   1492         if (gil_held)
   1493             PyErr_SetFromErrno(PyExc_OSError);
   1494         errno = err;
   1495         return -1;
   1496     }
   1497 
   1498     return n;
   1499 }
   1500 
   1501 /* Write count bytes of buf into fd.
   1502 
   1503    On success, return the number of written bytes, it can be lower than count
   1504    including 0. On error, raise an exception, set errno and return -1.
   1505 
   1506    When interrupted by a signal (write() fails with EINTR), retry the syscall.
   1507    If the Python signal handler raises an exception, the function returns -1
   1508    (the syscall is not retried).
   1509 
   1510    Release the GIL to call write(). The caller must hold the GIL. */
   1511 Py_ssize_t
   1512 _Py_write(int fd, const void *buf, size_t count)
   1513 {
   1514     assert(PyGILState_Check());
   1515 
   1516     /* _Py_write() must not be called with an exception set, otherwise the
   1517      * caller may think that write() was interrupted by a signal and the signal
   1518      * handler raised an exception. */
   1519     assert(!PyErr_Occurred());
   1520 
   1521     return _Py_write_impl(fd, buf, count, 1);
   1522 }
   1523 
   1524 /* Write count bytes of buf into fd.
   1525  *
   1526  * On success, return the number of written bytes, it can be lower than count
   1527  * including 0. On error, set errno and return -1.
   1528  *
   1529  * When interrupted by a signal (write() fails with EINTR), retry the syscall
   1530  * without calling the Python signal handler. */
   1531 Py_ssize_t
   1532 _Py_write_noraise(int fd, const void *buf, size_t count)
   1533 {
   1534     return _Py_write_impl(fd, buf, count, 0);
   1535 }
   1536 
   1537 #ifdef HAVE_READLINK
   1538 
   1539 /* Read value of symbolic link. Encode the path to the locale encoding, decode
   1540    the result from the locale encoding. Return -1 on error. */
   1541 
   1542 int
   1543 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
   1544 {
   1545     char *cpath;
   1546     char cbuf[MAXPATHLEN];
   1547     wchar_t *wbuf;
   1548     int res;
   1549     size_t r1;
   1550 
   1551     cpath = _Py_EncodeLocaleRaw(path, NULL);
   1552     if (cpath == NULL) {
   1553         errno = EINVAL;
   1554         return -1;
   1555     }
   1556     res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
   1557     PyMem_RawFree(cpath);
   1558     if (res == -1)
   1559         return -1;
   1560     if (res == Py_ARRAY_LENGTH(cbuf)) {
   1561         errno = EINVAL;
   1562         return -1;
   1563     }
   1564     cbuf[res] = '\0'; /* buf will be null terminated */
   1565     wbuf = Py_DecodeLocale(cbuf, &r1);
   1566     if (wbuf == NULL) {
   1567         errno = EINVAL;
   1568         return -1;
   1569     }
   1570     if (bufsiz <= r1) {
   1571         PyMem_RawFree(wbuf);
   1572         errno = EINVAL;
   1573         return -1;
   1574     }
   1575     wcsncpy(buf, wbuf, bufsiz);
   1576     PyMem_RawFree(wbuf);
   1577     return (int)r1;
   1578 }
   1579 #endif
   1580 
   1581 #ifdef HAVE_REALPATH
   1582 
   1583 /* Return the canonicalized absolute pathname. Encode path to the locale
   1584    encoding, decode the result from the locale encoding.
   1585    Return NULL on error. */
   1586 
   1587 wchar_t*
   1588 _Py_wrealpath(const wchar_t *path,
   1589               wchar_t *resolved_path, size_t resolved_path_size)
   1590 {
   1591     char *cpath;
   1592     char cresolved_path[MAXPATHLEN];
   1593     wchar_t *wresolved_path;
   1594     char *res;
   1595     size_t r;
   1596     cpath = _Py_EncodeLocaleRaw(path, NULL);
   1597     if (cpath == NULL) {
   1598         errno = EINVAL;
   1599         return NULL;
   1600     }
   1601     res = realpath(cpath, cresolved_path);
   1602     PyMem_RawFree(cpath);
   1603     if (res == NULL)
   1604         return NULL;
   1605 
   1606     wresolved_path = Py_DecodeLocale(cresolved_path, &r);
   1607     if (wresolved_path == NULL) {
   1608         errno = EINVAL;
   1609         return NULL;
   1610     }
   1611     if (resolved_path_size <= r) {
   1612         PyMem_RawFree(wresolved_path);
   1613         errno = EINVAL;
   1614         return NULL;
   1615     }
   1616     wcsncpy(resolved_path, wresolved_path, resolved_path_size);
   1617     PyMem_RawFree(wresolved_path);
   1618     return resolved_path;
   1619 }
   1620 #endif
   1621 
   1622 /* Get the current directory. size is the buffer size in wide characters
   1623    including the null character. Decode the path from the locale encoding.
   1624    Return NULL on error. */
   1625 
   1626 wchar_t*
   1627 _Py_wgetcwd(wchar_t *buf, size_t size)
   1628 {
   1629 #ifdef MS_WINDOWS
   1630     int isize = (int)Py_MIN(size, INT_MAX);
   1631     return _wgetcwd(buf, isize);
   1632 #else
   1633     char fname[MAXPATHLEN];
   1634     wchar_t *wname;
   1635     size_t len;
   1636 
   1637     if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
   1638         return NULL;
   1639     wname = Py_DecodeLocale(fname, &len);
   1640     if (wname == NULL)
   1641         return NULL;
   1642     if (size <= len) {
   1643         PyMem_RawFree(wname);
   1644         return NULL;
   1645     }
   1646     wcsncpy(buf, wname, size);
   1647     PyMem_RawFree(wname);
   1648     return buf;
   1649 #endif
   1650 }
   1651 
   1652 /* Duplicate a file descriptor. The new file descriptor is created as
   1653    non-inheritable. Return a new file descriptor on success, raise an OSError
   1654    exception and return -1 on error.
   1655 
   1656    The GIL is released to call dup(). The caller must hold the GIL. */
   1657 int
   1658 _Py_dup(int fd)
   1659 {
   1660 #ifdef MS_WINDOWS
   1661     HANDLE handle;
   1662     DWORD ftype;
   1663 #endif
   1664 
   1665     assert(PyGILState_Check());
   1666 
   1667 #ifdef MS_WINDOWS
   1668     _Py_BEGIN_SUPPRESS_IPH
   1669     handle = (HANDLE)_get_osfhandle(fd);
   1670     _Py_END_SUPPRESS_IPH
   1671     if (handle == INVALID_HANDLE_VALUE) {
   1672         PyErr_SetFromErrno(PyExc_OSError);
   1673         return -1;
   1674     }
   1675 
   1676     /* get the file type, ignore the error if it failed */
   1677     ftype = GetFileType(handle);
   1678 
   1679     Py_BEGIN_ALLOW_THREADS
   1680     _Py_BEGIN_SUPPRESS_IPH
   1681     fd = dup(fd);
   1682     _Py_END_SUPPRESS_IPH
   1683     Py_END_ALLOW_THREADS
   1684     if (fd < 0) {
   1685         PyErr_SetFromErrno(PyExc_OSError);
   1686         return -1;
   1687     }
   1688 
   1689     /* Character files like console cannot be make non-inheritable */
   1690     if (ftype != FILE_TYPE_CHAR) {
   1691         if (_Py_set_inheritable(fd, 0, NULL) < 0) {
   1692             _Py_BEGIN_SUPPRESS_IPH
   1693             close(fd);
   1694             _Py_END_SUPPRESS_IPH
   1695             return -1;
   1696         }
   1697     }
   1698 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
   1699     Py_BEGIN_ALLOW_THREADS
   1700     _Py_BEGIN_SUPPRESS_IPH
   1701     fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
   1702     _Py_END_SUPPRESS_IPH
   1703     Py_END_ALLOW_THREADS
   1704     if (fd < 0) {
   1705         PyErr_SetFromErrno(PyExc_OSError);
   1706         return -1;
   1707     }
   1708 
   1709 #else
   1710     Py_BEGIN_ALLOW_THREADS
   1711     _Py_BEGIN_SUPPRESS_IPH
   1712     fd = dup(fd);
   1713     _Py_END_SUPPRESS_IPH
   1714     Py_END_ALLOW_THREADS
   1715     if (fd < 0) {
   1716         PyErr_SetFromErrno(PyExc_OSError);
   1717         return -1;
   1718     }
   1719 
   1720     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
   1721         _Py_BEGIN_SUPPRESS_IPH
   1722         close(fd);
   1723         _Py_END_SUPPRESS_IPH
   1724         return -1;
   1725     }
   1726 #endif
   1727     return fd;
   1728 }
   1729 
   1730 #ifndef MS_WINDOWS
   1731 /* Get the blocking mode of the file descriptor.
   1732    Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
   1733    raise an exception and return -1 on error. */
   1734 int
   1735 _Py_get_blocking(int fd)
   1736 {
   1737     int flags;
   1738     _Py_BEGIN_SUPPRESS_IPH
   1739     flags = fcntl(fd, F_GETFL, 0);
   1740     _Py_END_SUPPRESS_IPH
   1741     if (flags < 0) {
   1742         PyErr_SetFromErrno(PyExc_OSError);
   1743         return -1;
   1744     }
   1745 
   1746     return !(flags & O_NONBLOCK);
   1747 }
   1748 
   1749 /* Set the blocking mode of the specified file descriptor.
   1750 
   1751    Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
   1752    otherwise.
   1753 
   1754    Return 0 on success, raise an exception and return -1 on error. */
   1755 int
   1756 _Py_set_blocking(int fd, int blocking)
   1757 {
   1758 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
   1759     int arg = !blocking;
   1760     if (ioctl(fd, FIONBIO, &arg) < 0)
   1761         goto error;
   1762 #else
   1763     int flags, res;
   1764 
   1765     _Py_BEGIN_SUPPRESS_IPH
   1766     flags = fcntl(fd, F_GETFL, 0);
   1767     if (flags >= 0) {
   1768         if (blocking)
   1769             flags = flags & (~O_NONBLOCK);
   1770         else
   1771             flags = flags | O_NONBLOCK;
   1772 
   1773         res = fcntl(fd, F_SETFL, flags);
   1774     } else {
   1775         res = -1;
   1776     }
   1777     _Py_END_SUPPRESS_IPH
   1778 
   1779     if (res < 0)
   1780         goto error;
   1781 #endif
   1782     return 0;
   1783 
   1784 error:
   1785     PyErr_SetFromErrno(PyExc_OSError);
   1786     return -1;
   1787 }
   1788 #endif
   1789 
   1790 
   1791 int
   1792 _Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
   1793                          const char **grouping)
   1794 {
   1795     int res = -1;
   1796 
   1797     struct lconv *lc = localeconv();
   1798 
   1799     int change_locale = 0;
   1800     if (decimal_point != NULL &&
   1801         (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
   1802     {
   1803         change_locale = 1;
   1804     }
   1805     if (thousands_sep != NULL &&
   1806         (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
   1807     {
   1808         change_locale = 1;
   1809     }
   1810 
   1811     /* Keep a copy of the LC_CTYPE locale */
   1812     char *oldloc = NULL, *loc = NULL;
   1813     if (change_locale) {
   1814         oldloc = setlocale(LC_CTYPE, NULL);
   1815         if (!oldloc) {
   1816             PyErr_SetString(PyExc_RuntimeWarning, "failed to get LC_CTYPE locale");
   1817             return -1;
   1818         }
   1819 
   1820         oldloc = _PyMem_Strdup(oldloc);
   1821         if (!oldloc) {
   1822             PyErr_NoMemory();
   1823             return -1;
   1824         }
   1825 
   1826         loc = setlocale(LC_NUMERIC, NULL);
   1827         if (loc != NULL && strcmp(loc, oldloc) == 0) {
   1828             loc = NULL;
   1829         }
   1830 
   1831         if (loc != NULL) {
   1832             /* Only set the locale temporarily the LC_CTYPE locale
   1833                if LC_NUMERIC locale is different than LC_CTYPE locale and
   1834                decimal_point and/or thousands_sep are non-ASCII or longer than
   1835                1 byte */
   1836             setlocale(LC_CTYPE, loc);
   1837         }
   1838     }
   1839 
   1840     if (decimal_point != NULL) {
   1841         *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
   1842         if (*decimal_point == NULL) {
   1843             goto error;
   1844         }
   1845     }
   1846     if (thousands_sep != NULL) {
   1847         *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
   1848         if (*thousands_sep == NULL) {
   1849             goto error;
   1850         }
   1851     }
   1852 
   1853     if (grouping != NULL) {
   1854         *grouping = lc->grouping;
   1855     }
   1856 
   1857     res = 0;
   1858 
   1859 error:
   1860     if (loc != NULL) {
   1861         setlocale(LC_CTYPE, oldloc);
   1862     }
   1863     PyMem_Free(oldloc);
   1864     return res;
   1865 }
   1866