Lines Matching refs:Charset
20 from email.charset import Charset
30 USASCII = Charset('us-ascii')
31 UTF8 = Charset('utf-8')
33 # Match encoded-word strings in the form =?charset?q?Hello_World?=
36 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
64 """Decode a message header value without converting charset.
66 Returns a list of (decoded_string, charset) pairs containing each of the
67 decoded parts of the header. Charset is None for non-encoded parts of the
95 charset, encoding = [s.lower() for s in parts[0:2]]
114 if decoded and decoded[-1][1] == charset:
117 decoded.append((dec, charset))
129 pairs of the format (decoded_string, charset) where charset is the string
138 for s, charset in decoded_seq:
140 if charset is not None and not isinstance(charset, Charset):
141 charset = Charset(charset)
142 h.append(s, charset)
149 def __init__(self, s=None, charset=None,
159 Optional charset serves two purposes: it has the same meaning as the
160 charset argument to the .append() method. It also sets the default
161 character set for all subsequent .append() calls that omit the charset
162 argument. If charset is not provided in the constructor, the us-ascii
163 charset is used both as s's initial charset and as the default for
177 if charset is None:
178 charset = USASCII
179 if not isinstance(charset, Charset):
180 charset = Charset(charset)
181 self._charset = charset
187 self.append(s, charset, errors)
210 for s, charset in self._chunks:
213 # from a charset to None/us-ascii, or from None/us-ascii to a
214 # charset. Only do this for the second and subsequent chunks.
215 nextcs = charset
224 uchunks.append(unicode(s, str(charset)))
237 def append(self, s, charset=None, errors='strict'):
240 Optional charset, if given, should be a Charset instance or the name
241 of a character set (which will be converted to a Charset instance). A
242 value of None (the default) means that the charset given in the
246 (i.e. isinstance(s, str) is true), then charset is the encoding of
248 cannot be decoded with that charset. If s is a Unicode string, then
249 charset is a hint specifying the character set of the characters in
252 following charsets in order: us-ascii, the charset hint, utf-8. The
258 if charset is None:
259 charset = self._charset
260 elif not isinstance(charset, Charset):
261 charset = Charset(charset)
262 # If the charset is our faux 8bit charset, leave the string unchanged
263 if charset != '8bit':
266 # charset.
269 # converted to a unicode with the input codec of the charset.
270 incodec = charset.input_codec or 'us-ascii'
275 outcodec = charset.output_codec or 'us-ascii'
281 for charset in USASCII, charset, UTF8:
283 outcodec = charset.output_codec or 'us-ascii'
290 self._chunks.append((s, charset))
292 def _split(self, s, charset, maxlinelen, splitchars):
294 splittable = charset.to_splittable(s)
295 encoded = charset.from_splittable(splittable, True)
296 elen = charset.encoded_header_len(encoded)
299 return [(encoded, charset)]
306 if charset == '8bit':
307 return [(s, charset)]
316 # For now, I can only imagine doing this when the charset is us-ascii,
319 elif charset == 'us-ascii':
320 return self._split_ascii(s, charset, maxlinelen, splitchars)
326 first = charset.from_splittable(splittable[:splitpnt], False)
327 last = charset.from_splittable(splittable[splitpnt:], False)
330 first, last = _binsplit(splittable, charset, maxlinelen)
333 fsplittable = charset.to_splittable(first)
334 fencoded = charset.from_splittable(fsplittable, True)
335 chunk = [(fencoded, charset)]
336 return chunk + self._split(last, charset, self._maxlinelen, splitchars)
338 def _split_ascii(self, s, charset, firstlen, splitchars):
341 return zip(chunks, [charset]*len(chunks))
346 # Given a list of pairs (string, charset), return a MIME-encoded
362 for header, charset in newchunks:
365 if charset is None or charset.header_encoding is None:
368 s = charset.header_encode(header)
393 If the given charset is not known or an error occurs during
403 for s, charset in self._chunks:
408 if targetlen < charset.encoded_header_len(''):
411 newchunks += self._split(s, charset, targetlen, splitchars)
494 def _binsplit(splittable, charset, maxlinelen):
507 chunk = charset.from_splittable(splittable[:m], True)
508 chunklen = charset.encoded_header_len(chunk)
518 first = charset.from_splittable(splittable[:i], False)
519 last = charset.from_splittable(splittable[i:], False)