Lines Matching refs:encoding
36 the encoding of an HTML or XML document, and converting it to
107 def sob(unicode, encoding):
108 """Returns either the given Unicode string or its encoding."""
109 if encoding is None:
112 return unicode.encode(encoding)
381 def substituteEncoding(self, str, encoding=None):
382 encoding = encoding or "utf-8"
383 return str.replace("%SOUP-ENCODING%", encoding)
385 def toEncoding(self, s, encoding=None):
386 """Encodes an object to a string in some encoding, or to Unicode.
389 if encoding:
390 s = s.encode(encoding)
392 if encoding:
393 s = s.encode(encoding)
397 if encoding:
398 s = self.toEncoding(str(s), encoding)
409 the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
429 def encode(self, encoding=DEFAULT_OUTPUT_ENCODING):
430 return self.decode().encode(encoding)
444 if u'%SOUP-ENCODING%' in output:
615 def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
617 return self.decode(eventualEncoding=encoding)
634 def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
636 return self.decode(prettyPrint, indentLevel, encoding).encode(encoding)
641 its contents. To get Unicode, pass None for encoding."""
650 and '%SOUP-ENCODING%' in val):
733 def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):
734 return self.encode(encoding, True)
736 def encodeContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
738 return self.decodeContents(prettyPrint, indentLevel).encode(encoding)
743 encoding. If encoding is None, returns a Unicode string.."""
802 def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
804 if encoding is None:
805 return self.decodeContents(prettyPrint, indentLevel, encoding)
807 return self.encodeContents(encoding, prettyPrint, indentLevel)
1034 object, possibly one with a %SOUP-ENCODING% slot into which an
1035 encoding will be plugged later."""
1037 text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"
1576 # An HTML encoding was sniffed while converting
1577 # the document to Unicode, or an HTML encoding was
1579 # document, or an encoding was specified
1582 return match.group(1) + "%SOUP-ENCODING%"
1589 # Go through it again with the encoding information.
1645 META tags may contain encoding information, and so on.
1710 # reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi
1735 """A class for detecting the encoding of a *ML document and
1736 converting it to a Unicode string. If the source encoding is
1770 u = self._convertFrom(chardet.detect(self.markup)['encoding'])
1802 # Convert smart quotes to HTML if coming from an encoding
1820 #print "Correct encoding: %s" % proposed
1823 def _toUnicode(self, data, encoding):
1824 '''Given a string and its encoding, decodes the string into Unicode.
1825 %encoding is a string recognized by encodings.aliases'''
1830 encoding = 'utf-16be'
1834 encoding = 'utf-16le'
1837 encoding = 'utf-8'
1840 encoding = 'utf-32be'
1843 encoding = 'utf-32le'
1845 newdata = unicode(data, encoding)
1849 """Given a document, tries to detect its XML encoding."""
1898 xml_encoding_re = '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode()